1 package uk.ac.cam.ch.wwmm.opsin;
2 
3 import java.util.ArrayDeque;
4 import java.util.ArrayList;
5 import java.util.Collection;
6 import java.util.Collections;
7 import java.util.Deque;
8 import java.util.HashMap;
9 import java.util.HashSet;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Set;
14 import java.util.regex.Matcher;
15 import java.util.regex.Pattern;
16 
17 import org.apache.log4j.Logger;
18 
19 import uk.ac.cam.ch.wwmm.opsin.IsotopeSpecificationParser.IsotopeSpecification;
20 
21 import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*;
22 import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*;
23 
24 /**
25  * Methods for processing the substitutive and additive operations that connect all the fragments together
26  * as well as indicated hydrogen/unsaturation/heteroatom replacement
27  * @author dl387
28  *
29  */
30 class StructureBuildingMethods {
31 	private static final Logger LOG = Logger.getLogger(StructureBuildingMethods.class);
32 	private static final Pattern matchCompoundLocant =Pattern.compile("[\\[\\(\\{](\\d+[a-z]?'*)[\\]\\)\\}]");
33 
StructureBuildingMethods()34 	private StructureBuildingMethods() {}
35 
36 	/**
37 	 * Resolves a word/bracket:
38 	 * Locanted attributes of words are resolved onto their group
39 	 * Locanted substitution is performed
40 	 * Connections involving multi radicals are processed
41 	 * Unlocanted attributes of words are resolved onto their group
42 	 *
43 	 * If word is a wordRule the function will instantly return
44 	 *
45 	 * @param state
46 	 * @param word
47 	 * @throws StructureBuildingException
48 	 */
resolveWordOrBracket(BuildState state, Element word)49 	static void resolveWordOrBracket(BuildState state, Element word) throws StructureBuildingException {
50 		if (word.getName().equals(WORDRULE_EL)){//already been resolved
51 			return;
52 		}
53 		if (!word.getName().equals(WORD_EL) && !word.getName().equals(BRACKET_EL)){
54 			throw new StructureBuildingException("A word or bracket is the expected input");
55 		}
56 		recursivelyResolveLocantedFeatures(state, word);
57 		recursivelyResolveUnLocantedFeatures(state, word);
58 		//TODO check all things that can substitute have outAtoms
59 		//TOOD think whether you can avoid the need to have a cansubstitute function by only using appropriate group
60 		List<Element> subsBracketsAndRoots = OpsinTools.getDescendantElementsWithTagNames(word, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL});
61 		for (Element subsBracketsAndRoot : subsBracketsAndRoots) {
62 			if (subsBracketsAndRoot.getAttribute(MULTIPLIER_ATR) != null) {
63 				throw new StructureBuildingException("Structure building problem: multiplier on :" + subsBracketsAndRoot.getName() + " was never used");
64 			}
65 		}
66 		List<Element> groups = OpsinTools.getDescendantElementsWithTagName(word, GROUP_EL);
67 		for (int i = 0; i < groups.size(); i++) {
68 			Element group = groups.get(i);
69 			if (group.getAttribute(RESOLVED_ATR)==null && i != groups.size()-1){
70 				throw new StructureBuildingException("Structure building problem: Bond was not made from :" +group.getValue() + " but one should of been");
71 			}
72 		}
73 	}
74 
75 	/**
76 	 * Performs locanted attribute resolution
77 	 * then additive joining of fragments
78 	 * then locanted substitutive joining of fragments
79 	 *
80 	 * @param state
81 	 * @param word
82 	 * @throws StructureBuildingException
83 	 */
recursivelyResolveLocantedFeatures(BuildState state, Element word)84 	static void recursivelyResolveLocantedFeatures(BuildState state, Element word) throws StructureBuildingException {
85 		if (!word.getName().equals(WORD_EL) && !word.getName().equals(BRACKET_EL)){
86 			throw new StructureBuildingException("A word or bracket is the expected input");
87 		}
88 		List<Element> subsBracketsAndRoots = OpsinTools.getChildElementsWithTagNames(word, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL});
89 		//substitution occurs left to right so by doing this right to left you ensure that any groups that will come into existence
90 		//due to multipliers being expanded will be in existence
91 		for (int i =subsBracketsAndRoots.size()-1; i>=0; i--) {
92 			Element subBracketOrRoot = subsBracketsAndRoots.get(i);
93 			if (subBracketOrRoot.getName().equals(BRACKET_EL)){
94 				recursivelyResolveLocantedFeatures(state,subBracketOrRoot);
95 				if (potentiallyCanSubstitute(subBracketOrRoot)){
96 					performAdditiveOperations(state, subBracketOrRoot);
97 					performLocantedSubstitutiveOperations(state, subBracketOrRoot);
98 				}
99 			}
100 			else{
101 				resolveRootOrSubstituentLocanted(state, subBracketOrRoot);
102 			}
103 		}
104 	}
105 
106 	/**
107 	 * Performs locanted attribute resolution
108 	 * then additive joining of fragments
109 	 * then locanted substitutive joining of fragments
110 	 *
111 	 * @param state
112 	 * @param word
113 	 * @throws StructureBuildingException
114 	 */
recursivelyResolveUnLocantedFeatures(BuildState state, Element word)115 	static void recursivelyResolveUnLocantedFeatures(BuildState state, Element word) throws StructureBuildingException {
116 		if (!word.getName().equals(WORD_EL) && !word.getName().equals(BRACKET_EL)){
117 			throw new StructureBuildingException("A word or bracket is the expected input");
118 		}
119 		List<Element> subsBracketsAndRoots = OpsinTools.getChildElementsWithTagNames(word, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL});
120 		//substitution occurs left to right so by doing this right to left you ensure that any groups that will come into existence
121 		//due to multipliers being expanded will be in existence
122 		for (int i =subsBracketsAndRoots.size()-1; i>=0; i--) {
123 			Element subBracketOrRoot = subsBracketsAndRoots.get(i);
124 			if (subBracketOrRoot.getName().equals(BRACKET_EL)){
125 				recursivelyResolveUnLocantedFeatures(state,subBracketOrRoot);
126 				if (potentiallyCanSubstitute(subBracketOrRoot)){
127 					performUnLocantedSubstitutiveOperations(state, subBracketOrRoot);
128 				}
129 			}
130 			else{
131 				resolveRootOrSubstituentUnLocanted(state, subBracketOrRoot);
132 			}
133 		}
134 	}
135 
resolveRootOrSubstituentLocanted(BuildState state, Element subOrRoot)136 	static void resolveRootOrSubstituentLocanted(BuildState state, Element subOrRoot) throws StructureBuildingException {
137 
138 		resolveLocantedFeatures(state, subOrRoot);//e.g. unsaturators, hydro groups and heteroatom replacement
139 
140 		boolean foundSomethingToSubstitute = potentiallyCanSubstitute(subOrRoot);
141 
142 		if (foundSomethingToSubstitute){
143 			performAdditiveOperations(state, subOrRoot);//e.g. ethylenediimino, oxyethylene (operations where two outAtoms are used to produce the bond and no locant is required as groups)
144 			performLocantedSubstitutiveOperations(state, subOrRoot);//e.g. 2-methyltoluene
145 		}
146 	}
147 
resolveRootOrSubstituentUnLocanted(BuildState state, Element subOrRoot)148 	static void resolveRootOrSubstituentUnLocanted(BuildState state, Element subOrRoot) throws StructureBuildingException {
149 
150 		boolean foundSomethingToSubstitute = potentiallyCanSubstitute(subOrRoot);
151 
152 		resolveUnLocantedFeatures(state, subOrRoot);//e.g. unsaturators, hydro groups and heteroatom replacement
153 
154 		if (foundSomethingToSubstitute){
155 			performUnLocantedSubstitutiveOperations(state, subOrRoot);//e.g. tetramethylfuran
156 		}
157 	}
158 
159 
performLocantedSubstitutiveOperations(BuildState state, Element subBracketOrRoot)160 	private static void performLocantedSubstitutiveOperations(BuildState state, Element subBracketOrRoot) throws StructureBuildingException {
161 		Element group;
162 		if (subBracketOrRoot.getName().equals(BRACKET_EL)) {
163 			group = findRightMostGroupInBracket(subBracketOrRoot);
164 		}
165 		else{
166 			group = subBracketOrRoot.getFirstChildElement(GROUP_EL);
167 		}
168 		if (group.getAttribute(RESOLVED_ATR) != null) {
169 			return;
170 		}
171 		Fragment frag = group.getFrag();
172 		if (frag.getOutAtomCount() >=1 && subBracketOrRoot.getAttribute(LOCANT_ATR) != null){
173 			String locantString = subBracketOrRoot.getAttributeValue(LOCANT_ATR);
174 			if (frag.getOutAtomCount() >1){
175 				checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(frag, group);
176 			}
177 			if (subBracketOrRoot.getAttribute(MULTIPLIER_ATR) != null) {//e.g. 1,2-diethyl
178 				multiplyOutAndSubstitute(state, subBracketOrRoot);
179 			}
180 			else{
181 				Fragment parentFrag = findFragmentWithLocant(subBracketOrRoot, locantString);
182 				if (parentFrag == null){
183 					String modifiedLocant = checkForBracketedPrimedLocantSpecialCase(subBracketOrRoot, locantString);
184 					if (modifiedLocant != null){
185 						parentFrag = findFragmentWithLocant(subBracketOrRoot, modifiedLocant);
186 						if (parentFrag != null){
187 							locantString = modifiedLocant;
188 						}
189 					}
190 				}
191 				if (parentFrag==null){
192 					throw new StructureBuildingException("Cannot find in scope fragment with atom with locant " + locantString + ".");
193 				}
194 				group.addAttribute(new Attribute(RESOLVED_ATR, "yes"));
195 				Element groupToAttachTo = parentFrag.getTokenEl();
196 				if (groupToAttachTo.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null &&
197 						parentFrag.getOutAtomCount() > 0 &&
198 						groupToAttachTo.getAttribute(ISAMULTIRADICAL_ATR) != null &&
199 						parentFrag.getAtomByLocantOrThrow(locantString).getOutValency() > 0 &&
200 						frag.getOutAtom(0).getValency() == 1 &&
201 						parentFrag.getFirstAtom().equals(parentFrag.getAtomByLocantOrThrow(locantString))) {
202 					//horrible special case to allow C-hydroxycarbonimidoyl and the like
203 					//If additive nomenclature the first atom should be an out atom
204 					joinFragmentsAdditively(state, frag, parentFrag);
205 				}
206 				else{
207 					Atom atomToSubstituteAt = parentFrag.getAtomByLocantOrThrow(locantString);
208 					if (PHOSPHO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && frag.getOutAtom(0).getValency() == 1){
209 						if (atomToSubstituteAt.getElement() != ChemEl.O){
210 							for (Atom neighbour : atomToSubstituteAt.getAtomNeighbours()) {
211 								if (neighbour.getElement() == ChemEl.O &&
212 										neighbour.getBondCount()==1 &&
213 										neighbour.getFirstBond().getOrder() == 1 &&
214 										neighbour.getOutValency() == 0 &&
215 										neighbour.getCharge() == 0){
216 									atomToSubstituteAt = neighbour;
217 									break;
218 								}
219 							}
220 						}
221 					}
222 					joinFragmentsSubstitutively(state, frag, atomToSubstituteAt);
223 				}
224 			}
225 		}
226 	}
227 
performUnLocantedSubstitutiveOperations(BuildState state, Element subBracketOrRoot)228 	private static void performUnLocantedSubstitutiveOperations(BuildState state, Element subBracketOrRoot) throws StructureBuildingException {
229 		Element group;
230 		if (subBracketOrRoot.getName().equals(BRACKET_EL)){
231 			group = findRightMostGroupInBracket(subBracketOrRoot);
232 		}
233 		else{
234 			group = subBracketOrRoot.getFirstChildElement(GROUP_EL);
235 		}
236 		if (group.getAttribute(RESOLVED_ATR) != null){
237 			return;
238 		}
239 		Fragment frag = group.getFrag();
240 		if (frag.getOutAtomCount() >= 1){
241 			if (subBracketOrRoot.getAttribute(LOCANT_ATR) != null){
242 				throw new RuntimeException("Substituent has an unused outAtom and has a locant but locanted substitution should already have been performed!");
243 			}
244 			if (frag.getOutAtomCount() > 1){
245 				checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(frag, group);
246 			}
247 			if (subBracketOrRoot.getAttribute(MULTIPLIER_ATR) != null) {//e.g. diethyl
248 				multiplyOutAndSubstitute(state, subBracketOrRoot);
249 			}
250 			else{
251 				if (PERHALOGENO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))) {
252 					performPerHalogenoSubstitution(state, frag, subBracketOrRoot);
253 				}
254 				else{
255 					List<Atom> atomsToJoinTo = null;
256 					if (PHOSPHO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && frag.getOutAtom(0).getValency() == 1){
257 						List<Fragment> possibleParents = findAlternativeFragments(subBracketOrRoot);
258 						for (Fragment fragment : possibleParents) {
259 							List<Atom> hydroxyAtoms = FragmentTools.findHydroxyGroups(fragment);
260 							if (hydroxyAtoms.size() >= 1){
261 								atomsToJoinTo = hydroxyAtoms;
262 							}
263 							break;
264 						}
265 					}
266 					if (atomsToJoinTo == null) {
267 						atomsToJoinTo = findAtomsForSubstitution(subBracketOrRoot, 1, frag.getOutAtom(0).getValency());
268 					}
269 					if (atomsToJoinTo == null){
270 						throw new StructureBuildingException("Unlocanted substitution failed: unable to find suitable atom to bond atom with id:" + frag.getOutAtom(0).getAtom().getID() + " to!");
271 					}
272 					if (AmbiguityChecker.isSubstitutionAmbiguous(atomsToJoinTo, 1)) {
273 						state.addIsAmbiguous("Connection of " + group.getValue() + " to " + atomsToJoinTo.get(0).getFrag().getTokenEl().getValue());
274 					}
275 					joinFragmentsSubstitutively(state, frag, atomsToJoinTo.get(0));
276 				}
277 				group.addAttribute(new Attribute(RESOLVED_ATR, "yes"));
278 			}
279 		}
280 	}
281 
282 	/**
283 	 * Clones the perhalogenFrag sufficiently to replace all in scope hydrogen with halogens.
284 	 * The cloned fragments are merged into the perhalogenFrag
285 	 * @param state
286 	 * @param perhalogenFrag
287 	 * @param subBracketOrRoot
288 	 * @throws StructureBuildingException
289 	 */
performPerHalogenoSubstitution(BuildState state, Fragment perhalogenFrag, Element subBracketOrRoot)290 	private static void performPerHalogenoSubstitution(BuildState state, Fragment perhalogenFrag, Element subBracketOrRoot) throws StructureBuildingException {
291 		List<Fragment> fragmentsToAttachTo = findAlternativeFragments(subBracketOrRoot);
292 		List<Atom> atomsToHalogenate = new ArrayList<Atom>();
293 		for (Fragment fragment : fragmentsToAttachTo) {
294 			FragmentTools.convertSpareValenciesToDoubleBonds(fragment);
295 			for (Atom atom : fragment.getAtomList()) {
296 				int substitutableHydrogen = calculateSubstitutableHydrogenAtoms(atom);
297 				if (substitutableHydrogen > 0 && FragmentTools.isCharacteristicAtom(atom)){
298 					continue;
299 				}
300 				for (int i = 0; i < substitutableHydrogen; i++) {
301 					atomsToHalogenate.add(atom);
302 				}
303 			}
304 		}
305 		if (atomsToHalogenate.size() == 0){
306 			throw new RuntimeException("Failed to find any substitutable hydrogen to apply " + perhalogenFrag.getTokenEl().getValue() + " to!");
307 		}
308 		List<Fragment> halogens = new ArrayList<Fragment>();
309 		halogens.add(perhalogenFrag);
310 		for (int i = 0; i < atomsToHalogenate.size() - 1; i++) {
311 			halogens.add(state.fragManager.copyFragment(perhalogenFrag));
312 		}
313 		for (int i = 0; i < atomsToHalogenate.size(); i++) {
314 			Fragment halogen = halogens.get(i);
315 			Atom from = halogen.getOutAtom(0).getAtom();
316 			halogen.removeOutAtom(0);
317 			state.fragManager.createBond(from, atomsToHalogenate.get(i), 1);
318 		}
319 		for (int i = 1; i < atomsToHalogenate.size(); i++) {
320 			state.fragManager.incorporateFragment(halogens.get(i), perhalogenFrag);
321 		}
322 	}
323 
324 	/**
325 	 * Multiplies out groups/brakets and substitutes them. The attribute "locant" is checked for locants
326 	 * If it is present it should contain a comma separated list of locants
327 	 * The strategy employed is to clone subOrBracket and its associated fragments as many times as the multiplier attribute
328 	 * perform(Un)LocantedSubstitutiveOperations is then called with on each call a different clone (or the original element) being in position
329 	 * Hence bonding between the clones is impossible
330 	 * @param state
331 	 * @param subOrBracket
332 	 * @throws StructureBuildingException
333 	 */
multiplyOutAndSubstitute(BuildState state, Element subOrBracket)334 	private static void multiplyOutAndSubstitute(BuildState state, Element subOrBracket) throws StructureBuildingException {
335 		Attribute multiplierAtr = subOrBracket.getAttribute(MULTIPLIER_ATR);
336 		int multiplier = Integer.parseInt(multiplierAtr.getValue());
337 		subOrBracket.removeAttribute(multiplierAtr);
338 		String[] locants = null;
339 		String locantsAtrValue = subOrBracket.getAttributeValue(LOCANT_ATR);
340 		if (locantsAtrValue != null){
341 			locants = locantsAtrValue.split(",");
342 		}
343 		Element parentWordOrBracket = subOrBracket.getParent();
344 		int indexOfSubOrBracket = parentWordOrBracket.indexOf(subOrBracket);
345 		subOrBracket.detach();
346 
347 		List<Element> elementsNotToBeMultiplied = new ArrayList<Element>();//anything before the multiplier in the sub/bracket
348 		Element multiplierEl = subOrBracket.getFirstChildElement(MULTIPLIER_EL);
349 		if (multiplierEl == null){
350 			throw new RuntimeException("Multiplier not found where multiplier expected");
351 		}
352 		for (int i = subOrBracket.indexOf(multiplierEl) -1 ; i >=0 ; i--) {
353 			Element el = subOrBracket.getChild(i);
354 			el.detach();
355 			elementsNotToBeMultiplied.add(el);
356 		}
357 		multiplierEl.detach();
358 
359 		List<Element> multipliedElements = new ArrayList<Element>();
360 		for (int i = multiplier - 1; i >=0; i--) {
361 			Element currentElement;
362 			if (i != 0){
363 				currentElement = state.fragManager.cloneElement(state, subOrBracket, i);
364 				addPrimesToLocantedStereochemistryElements(currentElement, StringTools.multiplyString("'", i));//Stereochemistry elements with locants will need to have their locants primed (stereochemistry is only processed after structure building)
365 			}
366 			else{
367 				currentElement = subOrBracket;
368 			}
369 			multipliedElements.add(currentElement);
370 			if (locants != null){
371 				parentWordOrBracket.insertChild(currentElement, indexOfSubOrBracket);
372 				currentElement.getAttribute(LOCANT_ATR).setValue(locants[i]);
373 				performLocantedSubstitutiveOperations(state, currentElement);
374 				currentElement.detach();
375 			}
376 		}
377 		if (locants == null) {
378 			parentWordOrBracket.insertChild(multipliedElements.get(0), indexOfSubOrBracket);
379 			performUnlocantedSubstitutiveOperations(state, multipliedElements);
380 			multipliedElements.get(0).detach();
381 		}
382 		for (Element multipliedElement : multipliedElements) {//attach all the multiplied subs/brackets
383 			parentWordOrBracket.insertChild(multipliedElement, indexOfSubOrBracket);
384 		}
385 		for (Element el : elementsNotToBeMultiplied) {//re-add anything before multiplier to original subOrBracket
386 			subOrBracket.insertChild(el, 0);
387 		}
388 	}
389 
performUnlocantedSubstitutiveOperations(BuildState state, List<Element> multipliedElements)390 	private static void performUnlocantedSubstitutiveOperations(BuildState state, List<Element> multipliedElements) throws StructureBuildingException {
391 		int numOfSubstituents = multipliedElements.size();
392 		Element subBracketOrRoot = multipliedElements.get(0);
393 		Element group;
394 		if (subBracketOrRoot.getName().equals(BRACKET_EL)){
395 			group = findRightMostGroupInBracket(subBracketOrRoot);
396 		}
397 		else{
398 			group = subBracketOrRoot.getFirstChildElement(GROUP_EL);
399 		}
400 		Fragment frag = group.getFrag();
401 		if (frag.getOutAtomCount() >= 1){
402 			if (subBracketOrRoot.getAttribute(LOCANT_ATR) != null){
403 				throw new RuntimeException("Substituent has an unused outAtom and has a locant but locanted substitution should already been been performed!");
404 			}
405 			if (PERHALOGENO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))) {
406 				throw new StructureBuildingException(group.getValue() + " cannot be multiplied");
407 			}
408 			if (frag.getOutAtomCount() > 1){
409 				checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(frag, group);
410 			}
411 			List<Atom> atomsToJoinTo = null;
412 			if (PHOSPHO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && frag.getOutAtom(0).getValency() == 1){
413 				List<Fragment> possibleParents = findAlternativeFragments(subBracketOrRoot);
414 				for (Fragment fragment : possibleParents) {
415 					List<Atom> hydroxyAtoms = FragmentTools.findHydroxyGroups(fragment);
416 					if (hydroxyAtoms.size() >= numOfSubstituents){
417 						atomsToJoinTo = hydroxyAtoms;
418 					}
419 					break;
420 				}
421 			}
422 			if (atomsToJoinTo == null) {
423 				atomsToJoinTo = findAtomsForSubstitution(subBracketOrRoot, numOfSubstituents, frag.getOutAtom(0).getValency());
424 			}
425 			if (atomsToJoinTo == null) {
426 				throw new StructureBuildingException("Unlocanted substitution failed: unable to find suitable atom to bond atom with id:" + frag.getOutAtom(0).getAtom().getID() + " to!");
427 			}
428 			if (AmbiguityChecker.isSubstitutionAmbiguous(atomsToJoinTo, numOfSubstituents)) {
429 				state.addIsAmbiguous("Connection of " + group.getValue() + " to " + atomsToJoinTo.get(0).getFrag().getTokenEl().getValue());
430 				List<Atom> atomsPreferredByEnvironment = AmbiguityChecker.useAtomEnvironmentsToGivePlausibleSubstitution(atomsToJoinTo, numOfSubstituents);
431 				if (atomsPreferredByEnvironment != null) {
432 					atomsToJoinTo = atomsPreferredByEnvironment;
433 				}
434 			}
435 
436 			joinFragmentsSubstitutively(state, frag, atomsToJoinTo.get(0));
437 			group.addAttribute(new Attribute(RESOLVED_ATR, "yes"));
438 
439 			for (int i = 1; i < numOfSubstituents; i++) {
440 				subBracketOrRoot = multipliedElements.get(i);
441 				if (subBracketOrRoot.getName().equals(BRACKET_EL)){
442 					group = findRightMostGroupInBracket(subBracketOrRoot);
443 				}
444 				else{
445 					group = subBracketOrRoot.getFirstChildElement(GROUP_EL);
446 				}
447 				frag = group.getFrag();
448 				if (frag.getOutAtomCount() > 1){//TODO do this prior to multiplication?
449 					checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(frag, group);
450 				}
451 
452 				joinFragmentsSubstitutively(state, frag, atomsToJoinTo.get(i));
453 				group.addAttribute(new Attribute(RESOLVED_ATR, "yes"));
454 			}
455 		}
456 	}
457 
458 	/**
459 	 * Adds locanted unsaturators, heteroatoms and hydrogen elements to the group within the sub or root
460 	 * @param state
461 	 * @param subOrRoot
462 	 * @throws StructureBuildingException
463 	 */
resolveLocantedFeatures(BuildState state, Element subOrRoot)464 	static void resolveLocantedFeatures(BuildState state, Element subOrRoot) throws StructureBuildingException {
465 		List<Element> groups = subOrRoot.getChildElements(GROUP_EL);
466 		if (groups.size() != 1){
467 			throw new StructureBuildingException("Each sub or root should only have one group element. This indicates a bug in OPSIN");
468 		}
469 		Element group = groups.get(0);
470 		Fragment thisFrag = group.getFrag();
471 
472 		List<Element> unsaturators = new ArrayList<Element>();
473 		List<Element> heteroatoms = new ArrayList<Element>();
474 		List<Element> hydrogenElements = new ArrayList<Element>();
475 		List<Element> subtractivePrefixElements = new ArrayList<Element>();
476 		List<Element> isotopeSpecifications = new ArrayList<Element>();
477 
478 		List<Element> children =subOrRoot.getChildElements();
479 		for (Element currentEl : children) {
480 			String elName =currentEl.getName();
481 			if (elName.equals(UNSATURATOR_EL)){
482 				unsaturators.add(currentEl);
483 			}
484 			else if (elName.equals(HETEROATOM_EL)){
485 				heteroatoms.add(currentEl);
486 			}
487 			else if (elName.equals(SUBTRACTIVEPREFIX_EL)){
488 				subtractivePrefixElements.add(currentEl);
489 			}
490 			else if (elName.equals(HYDRO_EL)){
491 				hydrogenElements.add(currentEl);
492 			}
493 			else if (elName.equals(INDICATEDHYDROGEN_EL)){
494 				hydrogenElements.add(currentEl);
495 			}
496 			else if (elName.equals(ADDEDHYDROGEN_EL)){
497 				hydrogenElements.add(currentEl);
498 			}
499 			else if (elName.equals(ISOTOPESPECIFICATION_EL)){
500 				isotopeSpecifications.add(currentEl);
501 			}
502 		}
503 		/*
504 		 * Add locanted functionality
505 		 */
506 
507 		List<Atom> atomsToDehydro = new ArrayList<Atom>();
508 		//locanted substitution can be assumed to be irrelevant to subtractive operations hence perform all subtractive operations now
509 		Map<ChemEl, Integer> unlocantedSubtractivePrefixes = new HashMap<ChemEl, Integer>();
510 
511 		for(int i = subtractivePrefixElements.size() -1; i >= 0; i--) {
512 			Element subtractivePrefix = subtractivePrefixElements.get(i);
513 			String type = subtractivePrefix.getAttributeValue(TYPE_ATR);
514 			if (type.equals(DEOXY_TYPE_VAL)){
515 				String locant = subtractivePrefix.getAttributeValue(LOCANT_ATR);
516 				ChemEl chemEl = ChemEl.valueOf(subtractivePrefix.getAttributeValue(VALUE_ATR));
517 				if (locant == null) {
518 					Integer count = unlocantedSubtractivePrefixes.get(chemEl);
519 					unlocantedSubtractivePrefixes.put(chemEl, count != null ? count + 1 : 1);
520 				}
521 				else {
522 					applySubtractivePrefix(state, thisFrag, chemEl, locant);
523 				}
524 			}
525 			else if (type.equals(ANHYDRO_TYPE_VAL)){
526 				applyAnhydroPrefix(state, thisFrag, subtractivePrefix);
527 			}
528 			else if (type.equals(DEHYDRO_TYPE_VAL)){
529 				String locant = subtractivePrefix.getAttributeValue(LOCANT_ATR);
530 				if(locant != null) {
531 					atomsToDehydro.add(thisFrag.getAtomByLocantOrThrow(locant));
532 				}
533 				else{
534 					throw new StructureBuildingException("locants are assumed to be required for the use of dehydro to be unambiguous");
535 				}
536 			}
537 			else{
538 				throw new StructureBuildingException("OPSIN bug: Unexpected subtractive prefix type: " + type);
539 			}
540 			subtractivePrefix.detach();
541 		}
542 		for (Entry<ChemEl, Integer> entry : unlocantedSubtractivePrefixes.entrySet()) {
543 			applyUnlocantedSubtractivePrefixes(state, thisFrag, entry.getKey(), entry.getValue());
544 		}
545 
546 		if (atomsToDehydro.size() > 0){
547 			boolean isCarbohydrateDehydro = false;
548 			if (group.getAttributeValue(TYPE_ATR).equals(CARBOHYDRATE_TYPE_VAL)){
549 				Set<Atom> uniquifiedDehydroAtoms = new HashSet<Atom>(atomsToDehydro);
550 				if (uniquifiedDehydroAtoms.size()==atomsToDehydro.size()){//need to rule out case where dehydro is being used to form triple bonds on carbohydrates
551 					isCarbohydrateDehydro = true;
552 				}
553 			}
554 			if (isCarbohydrateDehydro){
555 				for (Atom a : atomsToDehydro) {
556 					List<Atom> hydroxyAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(a.getAtomNeighbours(), ChemEl.O);
557 					if (hydroxyAtoms.size() > 0){
558 						hydroxyAtoms.get(0).getFirstBond().setOrder(2);
559 					}
560 					else{
561 						throw new StructureBuildingException("atom with locant " + a.getFirstLocant() + " did not have a hydroxy group to convert to a ketose");
562 					}
563 				}
564 			}
565 			else{
566 				List<Atom> atomsToFormDoubleBonds = new ArrayList<Atom>();
567 				List<Atom> atomsToFormTripleBondsBetween = new ArrayList<Atom>();//dehydro on a double/aromatic bond forms a triple bond
568 
569 				for (Atom a : atomsToDehydro) {
570 					if (!a.hasSpareValency()){
571 						a.setSpareValency(true);
572 						atomsToFormDoubleBonds.add(a);
573 					}
574 					else{
575 						atomsToFormTripleBondsBetween.add(a);
576 					}
577 				}
578 
579 				for (Atom atom : atomsToFormDoubleBonds) {//check that all the dehydro-ed atoms are next to another atom with spare valency
580 					boolean hasSpareValency =false;
581 					for (Atom neighbour : atom.getAtomNeighbours()) {
582 						if (neighbour.hasSpareValency()){
583 							hasSpareValency = true;
584 							break;
585 						}
586 					}
587 					if (!hasSpareValency){
588 						throw new StructureBuildingException("Unexpected use of dehydro; two adjacent atoms were not unsaturated such as to form a double bond");
589 					}
590 				}
591 				addDehydroInducedTripleBonds(atomsToFormTripleBondsBetween);
592 			}
593 		}
594 
595 		for(int i=hydrogenElements.size() -1;i >= 0;i--) {
596 			Element hydrogen = hydrogenElements.get(i);
597 			String locant = hydrogen.getAttributeValue(LOCANT_ATR);
598 			if(locant != null) {
599 				Atom a =thisFrag.getAtomByLocantOrThrow(locant);
600 				if (a.hasSpareValency()){
601 					a.setSpareValency(false);
602 				}
603 				else{
604 					if (!acdNameSpiroIndicatedHydrogenBug(group, locant)){
605 						throw new StructureBuildingException("hydrogen addition at locant: " + locant +" was requested, but this atom is not unsaturated");
606 					}
607 				}
608 				hydrogenElements.remove(i);
609 				hydrogen.detach();
610 			}
611 		}
612 
613 		for(int i=unsaturators.size() -1;i >= 0;i--) {
614 			Element unsaturator = unsaturators.get(i);
615 			String locant = unsaturator.getAttributeValue(LOCANT_ATR);
616 			int bondOrder = Integer.parseInt(unsaturator.getAttributeValue(VALUE_ATR));
617 			if(bondOrder <= 1) {
618 				unsaturator.detach();
619 				continue;
620 			}
621 			if(locant != null) {
622 				unsaturators.remove(unsaturator);
623 				/*
624 				 * Is the locant a compound locant e.g. 1(6)
625 				 * This would indicate unsaturation between the atoms with locants 1 and 6
626 				 */
627 				Matcher matcher = matchCompoundLocant.matcher(locant);
628 				if (matcher.find()) {
629 					String compoundLocant = matcher.group(1);
630 					locant = matcher.replaceAll("");
631 					FragmentTools.unsaturate(thisFrag.getAtomByLocantOrThrow(locant), compoundLocant, bondOrder, thisFrag);
632 				}
633 				else {
634 					FragmentTools.unsaturate(thisFrag.getAtomByLocantOrThrow(locant), bondOrder, thisFrag);
635 				}
636 				unsaturator.detach();
637 			}
638 		}
639 
640 		for(int i=heteroatoms.size() -1;i >= 0;i--) {
641 			Element heteroatomEl = heteroatoms.get(i);
642 			String locant = heteroatomEl.getAttributeValue(LOCANT_ATR);
643 			if(locant != null) {
644 				Atom heteroatom = state.fragManager.getHeteroatom(heteroatomEl.getAttributeValue(VALUE_ATR));
645 				Atom atomToBeReplaced =thisFrag.getAtomByLocantOrThrow(locant);
646 				if (heteroatom.getElement() == atomToBeReplaced.getElement() && heteroatom.getCharge() == atomToBeReplaced.getCharge()){
647 					throw new StructureBuildingException("The replacement term " +heteroatomEl.getValue() +" was used on an atom that already is a " + heteroatom.getElement());
648 				}
649 				state.fragManager.replaceAtomWithAtom(thisFrag.getAtomByLocantOrThrow(locant), heteroatom, true);
650 				if (heteroatomEl.getAttribute(LAMBDA_ATR) != null){
651 					thisFrag.getAtomByLocantOrThrow(locant).setLambdaConventionValency(Integer.parseInt(heteroatomEl.getAttributeValue(LAMBDA_ATR)));
652 				}
653 				heteroatoms.remove(heteroatomEl);
654 				heteroatomEl.detach();
655 			}
656 		}
657 
658 		if (isotopeSpecifications.size() > 0) {
659 			applyIsotopeSpecifications(state, thisFrag, isotopeSpecifications, true);
660 		}
661 	}
662 
663 	/**
664 	 * ACD/Name has a known bug where it produces names in which a suffixed saturated ring in a polycyclic spiro
665 	 * is treated as if it is unsaturated and hence has indicated hydrogens
666 	 * e.g. 1',3'-dihydro-2H,5H-spiro[imidazolidine-4,2'-indene]-2,5-dione
667 	 * @param group
668 	 * @param indicatedHydrogenLocant
669 	 * @return
670 	 */
acdNameSpiroIndicatedHydrogenBug(Element group, String indicatedHydrogenLocant)671 	private static boolean acdNameSpiroIndicatedHydrogenBug(Element group, String indicatedHydrogenLocant) {
672 		if (group.getValue().startsWith("spiro")) {
673 			for (Element suffix : group.getParent().getChildElements(SUFFIX_EL)) {
674 				String suffixLocant = suffix.getAttributeValue(LOCANT_ATR);
675 				if (suffixLocant != null && suffixLocant.equals(indicatedHydrogenLocant)) {
676 					LOG.debug("Indicated hydrogen at " + indicatedHydrogenLocant + " ignored. Known bug in generated IUPAC name");
677 					return true;
678 				}
679 			}
680 		}
681 		return false;
682 	}
683 
684 	/**
685 	 * Removes a terminal atom of a particular element e.g. oxygen
686 	 * The locant specifies the atom adjacent to the atom to be removed
687 	 * Formally the atom is replaced by hydrogen, hence stereochemistry is intentionally preserved
688 	 * @param state
689 	 * @param fragment
690 	 * @param chemEl
691 	 * @param locant A locant or null
692 	 * @throws StructureBuildingException
693 	 */
applySubtractivePrefix(BuildState state, Fragment fragment, ChemEl chemEl, String locant)694 	static void applySubtractivePrefix(BuildState state, Fragment fragment, ChemEl chemEl, String locant) throws StructureBuildingException {
695 		Atom adjacentAtom = fragment.getAtomByLocantOrThrow(locant);
696 		List<Atom> applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(adjacentAtom.getAtomNeighbours(), chemEl);
697 		if (applicableTerminalAtoms.isEmpty()) {
698 			throw new StructureBuildingException("Unable to find terminal atom of type: " + chemEl + " at locant "+ locant +" for subtractive nomenclature");
699 		}
700 		Atom atomToRemove = applicableTerminalAtoms.get(0);
701 		if (FragmentTools.isFunctionalAtom(atomToRemove)) {//This can occur with aminoglycosides where the anomeric OH is removed by deoxy
702 			for (int i = 0, len = fragment.getFunctionalAtomCount(); i < len; i++) {
703 				if (atomToRemove.equals(fragment.getFunctionalAtom(i).getAtom())) {
704 					fragment.removeFunctionalAtom(i);
705 					break;
706 				}
707 			}
708 			fragment.addFunctionalAtom(atomToRemove.getFirstBond().getOtherAtom(atomToRemove));
709 		}
710 		FragmentTools.removeTerminalAtom(state, atomToRemove);
711 	}
712 
713 	/**
714 	 * Removes terminal atoms of a particular element e.g. oxygen
715 	 * The number to remove is decided by the count
716 	 * Formally the atom is replaced by hydrogen, hence stereochemistry is intentionally preserved
717 	 * @param state
718 	 * @param fragment
719 	 * @param chemEl
720 	 * @param count
721 	 * @throws StructureBuildingException
722 	 */
applyUnlocantedSubtractivePrefixes(BuildState state, Fragment fragment, ChemEl chemEl, int count)723 	static void applyUnlocantedSubtractivePrefixes(BuildState state, Fragment fragment, ChemEl chemEl, int count) throws StructureBuildingException {
724 		List<Atom> applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(fragment.getAtomList(), chemEl);
725 		if (applicableTerminalAtoms.isEmpty() || applicableTerminalAtoms.size() < count) {
726 			throw new StructureBuildingException("Unable to find terminal atom of type: " + chemEl + " for subtractive nomenclature");
727 		}
728 		if (AmbiguityChecker.isSubstitutionAmbiguous(applicableTerminalAtoms, count)) {
729 			state.addIsAmbiguous("Group to remove with subtractive prefix");
730 		}
731 		for (int i = 0; i < count; i++) {
732 			Atom atomToRemove = applicableTerminalAtoms.get(i);
733 			if (FragmentTools.isFunctionalAtom(atomToRemove)) {//This can occur with aminoglycosides where the anomeric OH is removed by deoxy
734 				for (int j = 0, len = fragment.getFunctionalAtomCount(); j < len; j++) {
735 					if (atomToRemove.equals(fragment.getFunctionalAtom(j).getAtom())) {
736 						fragment.removeFunctionalAtom(j);
737 						break;
738 					}
739 				}
740 				fragment.addFunctionalAtom(atomToRemove.getFirstBond().getOtherAtom(atomToRemove));
741 			}
742 			FragmentTools.removeTerminalAtom(state, atomToRemove);
743 		}
744 	}
745 
applyAnhydroPrefix(BuildState state, Fragment frag, Element subtractivePrefix)746 	private static void applyAnhydroPrefix(BuildState state, Fragment frag, Element subtractivePrefix) throws StructureBuildingException {
747 		ChemEl chemEl = ChemEl.valueOf(subtractivePrefix.getAttributeValue(VALUE_ATR));
748 		String locantStr = subtractivePrefix.getAttributeValue(LOCANT_ATR);
749 		if (locantStr == null) {
750 			throw new StructureBuildingException("Two locants are required before an anhydro prefix");
751 		}
752 		String[] locants = locantStr.split(",");
753 		Atom backBoneAtom1 = frag.getAtomByLocantOrThrow(locants[0]);
754 		Atom backBoneAtom2 = frag.getAtomByLocantOrThrow(locants[1]);
755 		List<Atom> applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(backBoneAtom1.getAtomNeighbours(), chemEl);
756 		if (applicableTerminalAtoms.isEmpty()){
757 			throw new StructureBuildingException("Unable to find terminal atom of type: " + chemEl + " for subtractive nomenclature");
758 		}
759 		FragmentTools.removeTerminalAtom(state, applicableTerminalAtoms.get(0));
760 
761 		applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(backBoneAtom2.getAtomNeighbours(), chemEl);
762 		if (applicableTerminalAtoms.isEmpty()){
763 			throw new StructureBuildingException("Unable to find terminal atom of type: " + chemEl + " for subtractive nomenclature");
764 		}
765 		state.fragManager.createBond(backBoneAtom1, applicableTerminalAtoms.get(0), 1);
766 	}
767 
768 	/**
769 	 * Attempts to form triple bond between the atoms in atomsToFormTripleBondsBetween
770 	 * Throws an exception if the list contains duplicates or atoms with no adjacent atom in the list
771 	 * @param atomsToFormTripleBondsBetween
772 	 * @throws StructureBuildingException
773 	 */
addDehydroInducedTripleBonds(List<Atom> atomsToFormTripleBondsBetween)774 	private static void addDehydroInducedTripleBonds(List<Atom> atomsToFormTripleBondsBetween) throws StructureBuildingException {
775 		if (atomsToFormTripleBondsBetween.size()>0){
776 			Set<Atom> atoms = new HashSet<Atom>(atomsToFormTripleBondsBetween);
777 			if (atomsToFormTripleBondsBetween.size() != atoms.size()){
778 				throw new StructureBuildingException("locants specified for dehydro specify the same atom too many times");
779 			}
780 			atomLoop: for (int i = atomsToFormTripleBondsBetween.size()-1; i >=0; i = i-2) {//two atoms will have a triple bond formed betwen them
781 				Atom a = atomsToFormTripleBondsBetween.get(i);
782 				List<Atom> neighbours = a.getAtomNeighbours();
783 				for (Atom neighbour : neighbours) {
784 					if (atomsToFormTripleBondsBetween.contains(neighbour)){
785 						atomsToFormTripleBondsBetween.remove(i);
786 						atomsToFormTripleBondsBetween.remove(neighbour);
787 						Bond b = a.getBondToAtomOrThrow(neighbour);
788 						b.setOrder(3);
789 						a.setSpareValency(false);
790 						neighbour.setSpareValency(false);
791 						continue atomLoop;
792 					}
793 				}
794 				throw new StructureBuildingException("dehydro indicated atom should form a triple bond but no adjacent atoms also had hydrogen removed!");
795 			}
796 		}
797 	}
798 
799 	/**
800 	 * Adds locanted unsaturators, heteroatoms and hydrogen elements to the group within the sub or root
801 	 * @param state
802 	 * @param subOrRoot
803 	 * @throws StructureBuildingException
804 	 */
resolveUnLocantedFeatures(BuildState state, Element subOrRoot)805 	static void resolveUnLocantedFeatures(BuildState state, Element subOrRoot) throws StructureBuildingException {
806 		List<Element> groups = subOrRoot.getChildElements(GROUP_EL);
807 		if (groups.size() != 1){
808 			throw new StructureBuildingException("Each sub or root should only have one group element. This indicates a bug in OPSIN");
809 		}
810 		Fragment frag = groups.get(0).getFrag();
811 
812 		List<Integer> unsaturationBondOrders = new ArrayList<Integer>();
813 		List<Element> heteroatoms = new ArrayList<Element>();
814 		List<Element> hydrogenElements = new ArrayList<Element>();
815 		List<Element> isotopeSpecifications = new ArrayList<Element>();
816 
817 		List<Element> children = subOrRoot.getChildElements();
818 		for (Element currentEl : children) {
819 			String elName = currentEl.getName();
820 			if (elName.equals(UNSATURATOR_EL)) {
821 				int bondOrder = Integer.parseInt(currentEl.getAttributeValue(VALUE_ATR));
822 				if (bondOrder > 1) {
823 					unsaturationBondOrders.add(bondOrder);
824 				}
825 				currentEl.detach();
826 			}
827 			else if (elName.equals(HETEROATOM_EL)){
828 				heteroatoms.add(currentEl);
829 				currentEl.detach();
830 			}
831 			else if (elName.equals(HYDRO_EL) ||
832 				elName.equals(INDICATEDHYDROGEN_EL) ||
833 				elName.equals(ADDEDHYDROGEN_EL)){
834 				hydrogenElements.add(currentEl);
835 				currentEl.detach();
836 			}
837 			else if (elName.equals(ISOTOPESPECIFICATION_EL)){
838 				isotopeSpecifications.add(currentEl);
839 			}
840 		}
841 
842 		if (hydrogenElements.size() > 0) {
843 			applyUnlocantedHydro(state, frag, hydrogenElements);
844 		}
845 
846 		if (unsaturationBondOrders.size() > 0){
847 			unsaturateBonds(state, frag, unsaturationBondOrders);
848 		}
849 
850 		if (heteroatoms.size() > 0) {
851 			applyUnlocantedHeteroatoms(state, frag, heteroatoms);
852 		}
853 
854 		if (isotopeSpecifications.size() > 0) {
855 			applyIsotopeSpecifications(state, frag, isotopeSpecifications, false);
856 		}
857 
858 		if (frag.getOutAtomCount() > 0){//assign any outAtoms that have not been set to a specific atom to a specific atom
859 			for (int i = 0, l = frag.getOutAtomCount(); i < l; i++) {
860 				OutAtom outAtom = frag.getOutAtom(i);
861 				if (!outAtom.isSetExplicitly()){
862 					outAtom.setAtom(findAtomForUnlocantedRadical(state, frag, outAtom));
863 					outAtom.setSetExplicitly(true);
864 				}
865 			}
866 		}
867 	}
868 
applyUnlocantedHydro(BuildState state, Fragment frag, List<Element> hydrogenElements)869 	private static void applyUnlocantedHydro(BuildState state, Fragment frag, List<Element> hydrogenElements) throws StructureBuildingException {
870 		/*
871 		 * This function is not entirely straightforward as certain atoms definitely should have their spare valency reduced
872 		 * However names are not consistent as to whether they bother having the hydro tags do this!
873 		 * The atoms in atomsWithSV are in atom order those that can take a hydro element and then those that shouldn't really take a hydro element as its absence is unambiguous
874 		 */
875 		List<Atom> atomsAcceptingHydroPrefix = new ArrayList<Atom>();
876 		Set<Atom> atomsWhichImplicitlyHadTheirSVRemoved = new HashSet<Atom>();
877 		List<Atom> atomList = frag.getAtomList();
878 		for (Atom atom : atomList) {
879 			if (atom.getType().equals(SUFFIX_TYPE_VAL)){
880 				continue;
881 			}
882 			atom.ensureSVIsConsistantWithValency(false);//doesn't take into account suffixes
883 			if (atom.hasSpareValency()) {
884 				atomsAcceptingHydroPrefix.add(atom);
885 				//if we take into account suffixes is the SV removed
886 				atom.ensureSVIsConsistantWithValency(true);
887 				if (!atom.hasSpareValency()) {
888 					atomsWhichImplicitlyHadTheirSVRemoved.add(atom);
889 				}
890 			}
891 		}
892 
893 		int hydrogenElsCount = hydrogenElements.size();
894 		for (Element hydrogenElement : hydrogenElements) {
895 			if (hydrogenElement.getValue().equals("perhydro")) {
896 				if (hydrogenElsCount != 1){
897 					throw new StructureBuildingException("Unexpected indication of hydrogen when perhydro makes such indication redundnant");
898 				}
899 				for (Atom atom : atomsAcceptingHydroPrefix) {
900 					atom.setSpareValency(false);
901 				}
902 				return;
903 			}
904 		}
905 
906 		List<Atom> atomsWithDefiniteSV = new ArrayList<Atom>();
907 		List<Atom> otherAtomsThatCanHaveHydro = new ArrayList<Atom>();
908 		for(Atom a : atomsAcceptingHydroPrefix) {
909 			if (atomsWhichImplicitlyHadTheirSVRemoved.contains(a)) {
910 				otherAtomsThatCanHaveHydro.add(a);
911 			}
912 			else {
913 				boolean canFormDoubleBond = false;
914 				for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) {
915 					if(aa.hasSpareValency()) {
916 						canFormDoubleBond = true;
917 						break;
918 					}
919 				}
920 				if (canFormDoubleBond) {
921 					atomsWithDefiniteSV.add(a);
922 				}
923 				else {
924 					otherAtomsThatCanHaveHydro.add(a);
925 				}
926 			}
927 		}
928 		List<Atom> prioritisedAtomsAcceptingHydro = new ArrayList<Atom>(atomsWithDefiniteSV);
929 		prioritisedAtomsAcceptingHydro.addAll(otherAtomsThatCanHaveHydro);//these end up at the end of the list
930 
931 		if (hydrogenElsCount > prioritisedAtomsAcceptingHydro.size()) {
932 			throw new StructureBuildingException("Cannot find atom to add hydrogen to (" +
933 					hydrogenElsCount + " hydrogens requested but only " + prioritisedAtomsAcceptingHydro.size() +" positions that can be hydrogenated)" );
934 		}
935 
936 		int svCountAfterRemoval = atomsWithDefiniteSV.size() - hydrogenElsCount;
937 		if (svCountAfterRemoval > 1) { //ambiguity likely. If it's 1 then an atom will be implicitly hydrogenated
938 			//NOTE: as hydrogens as added in pairs the unambiguous if one hydrogen is added and allow atoms are identical condition is unlikely to be ever satisfied
939 			if (!(AmbiguityChecker.allAtomsEquivalent(atomsWithDefiniteSV) &&
940 					(hydrogenElsCount == 1 || hydrogenElsCount == atomsWithDefiniteSV.size() - 1))) {
941 				state.addIsAmbiguous("Ambiguous choice of positions to add hydrogen to on " + frag.getTokenEl().getValue());
942 			}
943 		}
944 
945 		for (int i = 0; i < hydrogenElsCount; i++) {
946 			prioritisedAtomsAcceptingHydro.get(i).setSpareValency(false);
947 		}
948 	}
949 
unsaturateBonds(BuildState state, Fragment frag, List<Integer> unsaturationBondOrders)950 	private static void unsaturateBonds(BuildState state, Fragment frag, List<Integer> unsaturationBondOrders) throws StructureBuildingException {
951 		int tripleBonds = 0;
952 		int doublebonds = 0;
953 		for (Integer bondOrder : unsaturationBondOrders) {
954 			if (bondOrder == 3) {
955 				tripleBonds++;
956 			}
957 			else if (bondOrder == 2) {
958 				doublebonds++;
959 			}
960 			else {
961 				throw new RuntimeException("Unexpected unsaturation bon order: " + bondOrder);
962 			}
963 		}
964 
965 		if (tripleBonds > 0) {
966 			unsaturateBonds(state, frag, 3, tripleBonds);
967 		}
968 		if (doublebonds > 0) {
969 			unsaturateBonds(state, frag, 2, doublebonds);
970 		}
971 	}
972 
unsaturateBonds(BuildState state, Fragment frag, int bondOrder, int numToUnsaturate)973 	private static void unsaturateBonds(BuildState state, Fragment frag, int bondOrder, int numToUnsaturate) throws StructureBuildingException {
974 		List<Bond> bondsThatCouldBeUnsaturated = findBondsToUnSaturate(frag, bondOrder, false);
975 		List<Bond> alternativeBondsThatCouldBeUnsaturated = Collections.emptyList();
976 		if (bondsThatCouldBeUnsaturated.size() < numToUnsaturate){
977 			bondsThatCouldBeUnsaturated = findBondsToUnSaturate(frag, bondOrder, true);
978 		}
979 		else {
980 			alternativeBondsThatCouldBeUnsaturated = findAlternativeBondsToUnSaturate(frag, bondOrder, bondsThatCouldBeUnsaturated);
981 		}
982 		if (bondsThatCouldBeUnsaturated.size() < numToUnsaturate){
983 			throw new StructureBuildingException("Failed to find bond to change to a bond of order: " + bondOrder);
984 		}
985 		if (bondsThatCouldBeUnsaturated.size() > numToUnsaturate) {
986 			//by convention cycloalkanes can have one unsaturation implicitly at the 1 locant
987 			//terms like oxazoline are formally ambiguous but in practice the lowest locant is the one that will be intended (in this case 2-oxazoline)
988 			if (!isCycloAlkaneSpecialCase(frag, numToUnsaturate, bondsThatCouldBeUnsaturated) &&
989 					!HANTZSCHWIDMAN_SUBTYPE_VAL.equals(frag.getSubType())) {
990 				if (alternativeBondsThatCouldBeUnsaturated.size() >= numToUnsaturate) {
991 					List<Bond> allBonds = new ArrayList<Bond>(bondsThatCouldBeUnsaturated);
992 					allBonds.addAll(alternativeBondsThatCouldBeUnsaturated);
993 					if (!(AmbiguityChecker.allBondsEquivalent(allBonds) &&
994 							numToUnsaturate == 1 )) {
995 						state.addIsAmbiguous("Unsaturation of bonds of " + frag.getTokenEl().getValue());
996 					}
997 				}
998 				else {
999 					if (!(AmbiguityChecker.allBondsEquivalent(bondsThatCouldBeUnsaturated) &&
1000 							(numToUnsaturate == 1 || numToUnsaturate == bondsThatCouldBeUnsaturated.size() - 1))){
1001 						state.addIsAmbiguous("Unsaturation of bonds of " + frag.getTokenEl().getValue());
1002 					}
1003 				}
1004 			}
1005 		}
1006 		for (int i = 0; i < numToUnsaturate; i++) {
1007 			bondsThatCouldBeUnsaturated.get(i).setOrder(bondOrder);
1008 		}
1009 	}
1010 
isCycloAlkaneSpecialCase(Fragment frag, int numToUnsaturate, List<Bond> bondsThatCouldBeUnsaturated)1011 	private static boolean isCycloAlkaneSpecialCase(Fragment frag, int numToUnsaturate, List<Bond> bondsThatCouldBeUnsaturated) {
1012 		if (numToUnsaturate == 1) {
1013 			Bond b = bondsThatCouldBeUnsaturated.get(0);
1014 			Atom a1 = b.getFromAtom();
1015 			Atom a2 = b.getToAtom();
1016 			if ((ALKANESTEM_SUBTYPE_VAL.equals(frag.getSubType()) || HETEROSTEM_SUBTYPE_VAL.equals(frag.getSubType())) &&
1017 					a1.getAtomIsInACycle() && a2.getAtomIsInACycle() &&
1018 					(a1.equals(frag.getFirstAtom()) || a2.equals(frag.getFirstAtom()))) {
1019 				//mono unsaturated cyclo alkanes are unambiguous e.g. cyclohexene
1020 				return true;
1021 			}
1022 		}
1023 		return false;
1024 	}
1025 
isCycloAlkaneHeteroatomSpecialCase(Fragment frag, int numHeteroatoms, List<Atom> atomsThatCouldBeReplaced)1026 	private static boolean isCycloAlkaneHeteroatomSpecialCase(Fragment frag, int numHeteroatoms, List<Atom> atomsThatCouldBeReplaced) {
1027 		if (numHeteroatoms == 1) {
1028 			if ((ALKANESTEM_SUBTYPE_VAL.equals(frag.getSubType()) || HETEROSTEM_SUBTYPE_VAL.equals(frag.getSubType())) &&
1029 					frag.getFirstAtom().getAtomIsInACycle() && atomsThatCouldBeReplaced.get(0).equals(frag.getFirstAtom())) {
1030 				//single heteroatom implicitly goes to 1 position
1031 				return true;
1032 			}
1033 		}
1034 		return false;
1035 	}
1036 
1037 	private static class HeteroAtomSmilesAndLambda {
1038 		private final String smiles;
1039 		private final String lambdaConvention;
1040 
HeteroAtomSmilesAndLambda(String smiles, String lambdaConvention)1041 		public HeteroAtomSmilesAndLambda(String smiles, String lambdaConvention) {
1042 			this.smiles = smiles;
1043 			this.lambdaConvention = lambdaConvention;
1044 		}
1045 
1046 		@Override
hashCode()1047 		public int hashCode() {
1048 			final int prime = 31;
1049 			int result = 1;
1050 			result = prime
1051 					* result
1052 					+ ((lambdaConvention == null) ? 0 : lambdaConvention
1053 							.hashCode());
1054 			result = prime * result
1055 					+ ((smiles == null) ? 0 : smiles.hashCode());
1056 			return result;
1057 		}
1058 
1059 		@Override
equals(Object obj)1060 		public boolean equals(Object obj) {
1061 			if (this == obj)
1062 				return true;
1063 			if (obj == null)
1064 				return false;
1065 			if (getClass() != obj.getClass())
1066 				return false;
1067 			HeteroAtomSmilesAndLambda other = (HeteroAtomSmilesAndLambda) obj;
1068 			if (lambdaConvention == null) {
1069 				if (other.lambdaConvention != null)
1070 					return false;
1071 			} else if (!lambdaConvention.equals(other.lambdaConvention))
1072 				return false;
1073 			if (smiles == null) {
1074 				if (other.smiles != null)
1075 					return false;
1076 			} else if (!smiles.equals(other.smiles))
1077 				return false;
1078 			return true;
1079 		}
1080 
1081 
1082 	}
1083 
applyUnlocantedHeteroatoms(BuildState state, Fragment frag, List<Element> heteroatoms)1084 	private static void applyUnlocantedHeteroatoms(BuildState state, Fragment frag, List<Element> heteroatoms) throws StructureBuildingException {
1085 		Map<HeteroAtomSmilesAndLambda, Integer> heteroatomDescriptionToCount = new HashMap<HeteroAtomSmilesAndLambda, Integer>();
1086 		for (Element heteroatomEl : heteroatoms) {
1087 			String smiles = heteroatomEl.getAttributeValue(VALUE_ATR);
1088 			String lambdaConvention = heteroatomEl.getAttributeValue(LAMBDA_ATR);
1089 			HeteroAtomSmilesAndLambda desc = new HeteroAtomSmilesAndLambda(smiles, lambdaConvention);
1090 			Integer count = heteroatomDescriptionToCount.get(desc);
1091 			heteroatomDescriptionToCount.put(desc, count != null ? count + 1 : 1);
1092 		}
1093 		List<Atom> atomlist = frag.getAtomList();
1094 		for (Entry<HeteroAtomSmilesAndLambda, Integer> entry : heteroatomDescriptionToCount.entrySet()) {
1095 			HeteroAtomSmilesAndLambda desc = entry.getKey();
1096 			int replacementsRequired = entry.getValue();
1097 			Atom heteroatom = state.fragManager.getHeteroatom(desc.smiles);
1098 			ChemEl heteroatomChemEl = heteroatom.getElement();
1099 			//finds an atom for which changing it to the specified heteroatom will not cause valency to be violated
1100 			List<Atom> atomsThatCouldBeReplaced = new ArrayList<Atom>();
1101 			for (Atom atom : atomlist) {
1102 				if (atom.getType().equals(SUFFIX_TYPE_VAL)) {
1103 					continue;
1104 				}
1105 				if ((heteroatomChemEl.equals(atom.getElement()) && heteroatom.getCharge() == atom.getCharge())){
1106 					continue;//replacement would do nothing
1107 				}
1108 				if(atom.getElement() != ChemEl.C && heteroatomChemEl != ChemEl.C){
1109 					if (atom.getElement() == ChemEl.O && (heteroatomChemEl == ChemEl.S || heteroatomChemEl == ChemEl.Se || heteroatomChemEl == ChemEl.Te)) {
1110 						//by special case allow replacement of oxygen by chalcogen
1111 					}
1112 					else{
1113 						//replacement of heteroatom by another heteroatom
1114 						continue;
1115 					}
1116 				}
1117 				if (ValencyChecker.checkValencyAvailableForReplacementByHeteroatom(atom, heteroatom)) {
1118 					atomsThatCouldBeReplaced.add(atom);
1119 				}
1120 			}
1121 			if (atomsThatCouldBeReplaced.size() < replacementsRequired){
1122 				throw new StructureBuildingException("Cannot find suitable atom for heteroatom replacement");
1123 			}
1124 
1125 			if (atomsThatCouldBeReplaced.size() > replacementsRequired && !isCycloAlkaneHeteroatomSpecialCase(frag, replacementsRequired, atomsThatCouldBeReplaced)) {
1126 				if (!(AmbiguityChecker.allAtomsEquivalent(atomsThatCouldBeReplaced) &&
1127 						(replacementsRequired == 1 || replacementsRequired == atomsThatCouldBeReplaced.size() - 1))) {
1128 					//by convention cycloalkanes can have one unsaturation implicitly at the 1 locant
1129 					state.addIsAmbiguous("Heteroatom replacement on " + frag.getTokenEl().getValue());
1130 				}
1131 			}
1132 
1133 			for (int i = 0; i < replacementsRequired; i++) {
1134 				Atom atomToReplaceWithHeteroAtom = atomsThatCouldBeReplaced.get(i);
1135 				state.fragManager.replaceAtomWithAtom(atomToReplaceWithHeteroAtom, heteroatom, true);
1136 				if (desc.lambdaConvention != null) {
1137 					atomToReplaceWithHeteroAtom.setLambdaConventionValency(Integer.parseInt(desc.lambdaConvention));
1138 				}
1139 			}
1140 		}
1141 	}
1142 
applyIsotopeSpecifications(BuildState state, Fragment frag, List<Element> isotopeSpecifications, boolean applyLocanted)1143 	private static void applyIsotopeSpecifications(BuildState state, Fragment frag, List<Element> isotopeSpecifications, boolean applyLocanted) throws StructureBuildingException {
1144 		for(int i = isotopeSpecifications.size() - 1; i >= 0; i--) {
1145 			Element isotopeSpecification = isotopeSpecifications.get(i);
1146 			IsotopeSpecification isotopeSpec = IsotopeSpecificationParser.parseIsotopeSpecification(isotopeSpecification);
1147 			String[] locants = isotopeSpec.getLocants();
1148 			if(locants != null) {
1149 				if (!applyLocanted) {
1150 					continue;
1151 				}
1152 			}
1153 			else if (applyLocanted) {
1154 				continue;
1155 			}
1156 
1157 			ChemEl chemEl = isotopeSpec.getChemEl();
1158 			int isotope = isotopeSpec.getIsotope();
1159 			if(locants != null) {
1160 				if (chemEl == ChemEl.H) {
1161 					for (int j = 0; j < locants.length; j++) {
1162 						Atom atomWithHydrogenIsotope = frag.getAtomByLocantOrThrow(locants[j]);
1163 						Atom hydrogen = state.fragManager.createAtom(isotopeSpec.getChemEl(), frag);
1164 						hydrogen.setIsotope(isotope);
1165 						state.fragManager.createBond(atomWithHydrogenIsotope, hydrogen, 1);
1166 					}
1167 				}
1168 				else {
1169 					for (int j = 0; j < locants.length; j++) {
1170 						Atom atom = frag.getAtomByLocantOrThrow(locants[j]);
1171 						if (chemEl != atom.getElement()) {
1172 							throw new StructureBuildingException("The atom at locant: " + locants[j]  + " was not a " + chemEl.toString() );
1173 						}
1174 						atom.setIsotope(isotope);
1175 					}
1176 				}
1177 			}
1178 			else {
1179 				int multiplier = isotopeSpec.getMultiplier();
1180 				if (chemEl == ChemEl.H) {
1181 					List<Atom> parentAtomsToApplyTo = FragmentTools.findnAtomsForSubstitution(frag, multiplier, 1);
1182 					if (parentAtomsToApplyTo == null){
1183 						throw new StructureBuildingException("Failed to find sufficient hydrogen atoms for unlocanted hydrogen isotope replacement");
1184 					}
1185 					if (AmbiguityChecker.isSubstitutionAmbiguous(parentAtomsToApplyTo, multiplier)) {
1186 						if (!casIsotopeAmbiguitySpecialCase(frag, parentAtomsToApplyTo, multiplier)) {
1187 							state.addIsAmbiguous("Position of hydrogen isotope on " + frag.getTokenEl().getValue());
1188 						}
1189 					}
1190 					for (int j = 0; j < multiplier; j++) {
1191 						Atom atomWithHydrogenIsotope = parentAtomsToApplyTo.get(j);
1192 						Atom hydrogen = state.fragManager.createAtom(isotopeSpec.getChemEl(), frag);
1193 						hydrogen.setIsotope(isotope);
1194 						state.fragManager.createBond(atomWithHydrogenIsotope, hydrogen, 1);
1195 					}
1196 				}
1197 				else {
1198 					List<Atom> parentAtomsToApplyTo = new ArrayList<Atom>();
1199 					for (Atom atom : frag.getAtomList()) {
1200 						if (atom.getElement() == chemEl) {
1201 							parentAtomsToApplyTo.add(atom);
1202 						}
1203 					}
1204 					if (parentAtomsToApplyTo.size() < multiplier) {
1205 						throw new StructureBuildingException("Failed to find sufficient atoms for " + chemEl.toString() + " isotope replacement");
1206 					}
1207 					if (AmbiguityChecker.isSubstitutionAmbiguous(parentAtomsToApplyTo, multiplier)) {
1208 						state.addIsAmbiguous("Position of isotope on " + frag.getTokenEl().getValue());
1209 					}
1210 					for (int j = 0; j < multiplier; j++) {
1211 						parentAtomsToApplyTo.get(j).setIsotope(isotope);
1212 					}
1213 				}
1214 			}
1215 			isotopeSpecification.detach();
1216 		}
1217 	}
1218 
casIsotopeAmbiguitySpecialCase(Fragment frag, List<Atom> parentAtomsToApplyTo, int multiplier)1219 	private static boolean casIsotopeAmbiguitySpecialCase(Fragment frag, List<Atom> parentAtomsToApplyTo, int multiplier) throws StructureBuildingException {
1220 		if (multiplier !=1) {
1221 			return false;
1222 		}
1223 		List<Atom> atoms = frag.getAtomList();
1224 		Atom firstAtom = atoms.get(0);
1225 		if (!parentAtomsToApplyTo.get(0).equals(firstAtom)) {
1226 			return false;
1227 		}
1228 		ChemEl firstAtomEl = firstAtom.getElement();
1229 		if (atoms.size() ==2) {
1230 			if (firstAtomEl == atoms.get(1).getElement()) {
1231 				//e.g. ethane
1232 				return true;
1233 			}
1234 		}
1235 		else {
1236 			int intraFragValency = frag.getIntraFragmentIncomingValency(firstAtom);
1237 			boolean spareValency = firstAtom.hasSpareValency();
1238 			if (firstAtom.getAtomIsInACycle()) {
1239 				for (int i = 1; i < atoms.size(); i++) {
1240 					Atom atom = atoms.get(i);
1241 					if (atom.getElement() != firstAtomEl){
1242 						return false;
1243 					}
1244 					if (frag.getIntraFragmentIncomingValency(atom) != intraFragValency){
1245 						return false;
1246 					}
1247 					if (atom.hasSpareValency() != spareValency){
1248 						return false;
1249 					}
1250 				}
1251 				//e.g. benzene
1252 				return true;
1253 			}
1254 		}
1255 		return false;
1256 	}
1257 
findAtomForUnlocantedRadical(BuildState state, Fragment frag, OutAtom outAtom)1258 	static Atom findAtomForUnlocantedRadical(BuildState state, Fragment frag, OutAtom outAtom) throws StructureBuildingException {
1259 		List<Atom> possibleAtoms = FragmentTools.findnAtomsForSubstitution(frag, outAtom.getAtom(), 1, outAtom.getValency(), true);
1260 		if (possibleAtoms == null){
1261 			throw new StructureBuildingException("Failed to assign all unlocanted radicals to actual atoms without violating valency");
1262 		}
1263 		if (!((ALKANESTEM_SUBTYPE_VAL.equals(frag.getSubType()) || HETEROSTEM_SUBTYPE_VAL.equals(frag.getSubType())) && possibleAtoms.get(0).equals(frag.getFirstAtom()))) {
1264 			if (AmbiguityChecker.isSubstitutionAmbiguous(possibleAtoms, 1)) {
1265 				state.addIsAmbiguous("Positioning of radical on: " + frag.getTokenEl().getValue());
1266 			}
1267 		}
1268 		return possibleAtoms.get(0);
1269 	}
1270 
1271 
findAlternativeBondsToUnSaturate(Fragment frag, int bondOrder, Collection<Bond> bondsToIgnore)1272 	private static List<Bond> findAlternativeBondsToUnSaturate(Fragment frag, int bondOrder, Collection<Bond> bondsToIgnore) {
1273 		return findBondsToUnSaturate(frag, bondOrder, false, new HashSet<Bond>(bondsToIgnore));
1274 	}
1275 
1276 	/**
1277 	 * Finds bond within the fragment that can have their bondOrder increased to the specified bond order
1278 	 * Depending on the value of allowAdjacentUnsaturatedBonds adjacent higher bonds are prevented
1279 	 * @param frag
1280 	 * @param bondOrder
1281 	 * @param allowAdjacentUnsaturatedBonds
1282 	 * @return
1283 	 */
findBondsToUnSaturate(Fragment frag, int bondOrder, boolean allowAdjacentUnsaturatedBonds)1284 	static List<Bond> findBondsToUnSaturate(Fragment frag, int bondOrder, boolean allowAdjacentUnsaturatedBonds) {
1285 		return findBondsToUnSaturate(frag, bondOrder, allowAdjacentUnsaturatedBonds, Collections.<Bond>emptySet());
1286 	}
1287 
findBondsToUnSaturate(Fragment frag, int bondOrder, boolean allowAdjacentUnsaturatedBonds, Set<Bond> bondsToIgnore)1288 	private static List<Bond> findBondsToUnSaturate(Fragment frag, int bondOrder, boolean allowAdjacentUnsaturatedBonds, Set<Bond> bondsToIgnore) {
1289 		List<Bond> bondsToUnsaturate = new ArrayList<Bond>();
1290 		mainLoop: for (Atom atom1 : frag.getAtomList()) {
1291 			if (atom1.hasSpareValency() || SUFFIX_TYPE_VAL.equals(atom1.getType()) || atom1.getProperty(Atom.ISALDEHYDE) !=null) {
1292 				continue;
1293 			}
1294 			List<Bond> bonds = atom1.getBonds();
1295 			int incomingValency = 0;
1296 			for (Bond bond : bonds) {
1297 				//don't place implicitly unsaturated bonds next to each other
1298 				if (bond.getOrder() != 1 && !allowAdjacentUnsaturatedBonds) {
1299 					continue mainLoop;
1300 				}
1301 				if (bondsToUnsaturate.contains(bond)) {
1302 					if (!allowAdjacentUnsaturatedBonds) {
1303 						continue mainLoop;
1304 					}
1305 					incomingValency += bondOrder;
1306 				}
1307 				else {
1308 					incomingValency += bond.getOrder();
1309 				}
1310 			}
1311 
1312 			Integer maxVal = getLambdaValencyOrHwValencyOrMaxValIfCharged(atom1);
1313 			if(maxVal != null && (incomingValency + (bondOrder - 1) + atom1.getOutValency()) > maxVal) {
1314 				continue;
1315 			}
1316 			bondLoop: for (Bond bond : bonds) {
1317 				if (bond.getOrder() == 1 && !bondsToUnsaturate.contains(bond) && !bondsToIgnore.contains(bond)) {
1318 					Atom atom2 = bond.getOtherAtom(atom1);
1319 					if (frag.getAtomByID(atom2.getID()) != null) {//check other atom is actually in the fragment!
1320 						if (atom2.hasSpareValency() || SUFFIX_TYPE_VAL.equals(atom2.getType()) || atom2.getProperty(Atom.ISALDEHYDE) !=null) {
1321 							continue;
1322 						}
1323 						int incomingValency2 = 0;
1324 						for (Bond bond2 : atom2.getBonds()) {
1325 							//don't place implicitly unsaturated bonds next to each other
1326 							if (bond2.getOrder() != 1 && !allowAdjacentUnsaturatedBonds) {
1327 								continue bondLoop;
1328 							}
1329 							if (bondsToUnsaturate.contains(bond2)) {
1330 								if (!allowAdjacentUnsaturatedBonds) {
1331 									continue bondLoop;
1332 								}
1333 								incomingValency2 += bondOrder;
1334 							}
1335 							else {
1336 								incomingValency2 += bond2.getOrder();
1337 							}
1338 						}
1339 
1340 						Integer maxVal2 = getLambdaValencyOrHwValencyOrMaxValIfCharged(atom2);
1341 						if(maxVal2 != null && (incomingValency2 + (bondOrder - 1) + atom2.getOutValency()) > maxVal2) {
1342 							continue;
1343 						}
1344 						bondsToUnsaturate.add(bond);
1345 						break bondLoop;
1346 					}
1347 				}
1348 			}
1349 		}
1350 		return bondsToUnsaturate;
1351 	}
1352 
1353 
1354 	/**
1355 	 * Return the lambda convention derived valency + protons if set
1356 	 * Otherwise if charge is 0 returns {@link ValencyChecker#getHWValency(ChemEl)}
1357 	 * Otherwise return {@link ValencyChecker#getMaximumValency(ChemEl, int)}
1358 	 * Returns null if the maximum valency is not known
1359 	 * @param a
1360 	 * @return
1361 	 */
getLambdaValencyOrHwValencyOrMaxValIfCharged(Atom a)1362 	static Integer getLambdaValencyOrHwValencyOrMaxValIfCharged(Atom a) {
1363 		if (a.getLambdaConventionValency() != null) {
1364 			return a.getLambdaConventionValency() + a.getProtonsExplicitlyAddedOrRemoved();
1365 		}
1366 		else if (a.getCharge() == 0){
1367 			return ValencyChecker.getHWValency(a.getElement());
1368 		}
1369 		else {
1370 			return ValencyChecker.getMaximumValency(a.getElement(), a.getCharge());
1371 		}
1372 	}
1373 
performAdditiveOperations(BuildState state, Element subBracketOrRoot)1374 	private static void performAdditiveOperations(BuildState state, Element subBracketOrRoot) throws StructureBuildingException {
1375 		if (subBracketOrRoot.getAttribute(LOCANT_ATR) != null){//additive nomenclature does not employ locants
1376 			return;
1377 		}
1378 		Element group;
1379 		if (subBracketOrRoot.getName().equals(BRACKET_EL)){
1380 			group =findRightMostGroupInBracket(subBracketOrRoot);
1381 		}
1382 		else{
1383 			group =subBracketOrRoot.getFirstChildElement(GROUP_EL);
1384 		}
1385 		if (group.getAttribute(RESOLVED_ATR) != null){
1386 			return;
1387 		}
1388 		Fragment frag = group.getFrag();
1389 		int outAtomCount = frag.getOutAtomCount();
1390 		if (outAtomCount >=1){
1391 			if (subBracketOrRoot.getAttribute(MULTIPLIER_ATR) ==null){
1392 				Element nextSiblingEl = OpsinTools.getNextSibling(subBracketOrRoot);
1393 				if (nextSiblingEl.getAttribute(MULTIPLIER_ATR) != null &&
1394 						(outAtomCount >= Integer.parseInt(nextSiblingEl.getAttributeValue(MULTIPLIER_ATR)) || //probably multiplicative nomenclature, should be as many outAtoms as the multiplier
1395 						outAtomCount==1 && frag.getOutAtom(0).getValency()==Integer.parseInt(nextSiblingEl.getAttributeValue(MULTIPLIER_ATR))) &&
1396 						hasRootLikeOrMultiRadicalGroup(nextSiblingEl)){
1397 					if (outAtomCount==1){//special case e.g. 4,4'-(benzylidene)dianiline
1398 						FragmentTools.splitOutAtomIntoValency1OutAtoms(frag.getOutAtom(0));
1399 						//special case where something like benzylidene is being used as if it meant benzdiyl for multiplicative nomenclature
1400 						//this is allowed in the IUPAC 79 recommendations but not recommended in the current recommendations
1401 					}
1402 					performMultiplicativeOperations(state, group, nextSiblingEl);
1403 				}
1404 				else if (group.getAttribute(ISAMULTIRADICAL_ATR) != null){//additive nomenclature e.g. ethyleneoxy
1405 					Fragment nextFrag = getNextInScopeMultiValentFragment(subBracketOrRoot);
1406 					if (nextFrag != null){
1407 						Element nextMultiRadicalGroup = nextFrag.getTokenEl();
1408 						Element parentSubOrRoot = nextMultiRadicalGroup.getParent();
1409 						if (state.currentWordRule != WordRule.polymer){//imino does not behave like a substituent in polymers only as a linker
1410 							if (nextMultiRadicalGroup.getAttribute(IMINOLIKE_ATR) != null){//imino/methylene can just act as normal substituents, should an additive bond really be made???
1411 								Fragment adjacentFrag = OpsinTools.getNextGroup(subBracketOrRoot).getFrag();
1412 
1413 								if (nextFrag != adjacentFrag){//imino is not the absolute next frag
1414 									if (potentiallyCanSubstitute(nextMultiRadicalGroup.getParent()) || potentiallyCanSubstitute(nextMultiRadicalGroup.getParent().getParent())){
1415 										return;
1416 									}
1417 								}
1418 							}
1419 							if (group.getAttribute(IMINOLIKE_ATR) != null && levelsToWordEl(group) > levelsToWordEl(nextMultiRadicalGroup)){
1420 								return;//e.g. imino substitutes ((chloroimino)ethylene)dibenzene
1421 							}
1422 						}
1423 						if (parentSubOrRoot.getAttribute(MULTIPLIER_ATR) != null){
1424 							throw new StructureBuildingException("Attempted to form additive bond to a multiplied component");
1425 						}
1426 						group.addAttribute(new Attribute(RESOLVED_ATR, "yes"));
1427 						joinFragmentsAdditively(state, frag, nextFrag);
1428 					}
1429 				}
1430 				else {//e.g. chlorocarbonyl or hydroxy(sulfanyl)phosphoryl
1431 					List<Fragment> siblingFragments = findAlternativeFragments(subBracketOrRoot);
1432 					if (siblingFragments.size()>0){
1433 						Fragment nextFrag = siblingFragments.get(siblingFragments.size()-1);
1434 						Element nextGroup = nextFrag.getTokenEl();
1435 						if (nextGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && nextGroup.getAttribute(ISAMULTIRADICAL_ATR) != null && (nextFrag.getOutAtomCount()>1|| nextGroup.getAttribute(RESOLVED_ATR) != null && nextFrag.getOutAtomCount()>=1 )){
1436 							Atom toAtom = nextFrag.getOutAtom(0).getAtom();
1437 							if (calculateSubstitutableHydrogenAtoms(toAtom) ==0){
1438 								group.addAttribute(new Attribute(RESOLVED_ATR, "yes"));
1439 								joinFragmentsAdditively(state, frag, nextFrag);//e.g. aminocarbonyl or aminothio
1440 							}
1441 						}
1442 						if (group.getAttribute(RESOLVED_ATR)==null && siblingFragments.size()>1){
1443 							for (int i = 0; i< siblingFragments.size()-1; i++) {
1444 								Fragment lastFrag = siblingFragments.get(i);
1445 								Element lastGroup = lastFrag.getTokenEl();
1446 								if (lastGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && lastGroup.getAttribute(ISAMULTIRADICAL_ATR) != null && (lastFrag.getOutAtomCount()>1|| lastGroup.getAttribute(RESOLVED_ATR) != null && lastFrag.getOutAtomCount()>=1 )){
1447 									Atom toAtom = lastFrag.getOutAtom(0).getAtom();
1448 									if (calculateSubstitutableHydrogenAtoms(toAtom) ==0){
1449 										group.addAttribute(new Attribute(RESOLVED_ATR, "yes"));
1450 										joinFragmentsAdditively(state, frag, lastFrag);//e.g. hydroxy(sulfanyl)phosphoryl
1451 									}
1452 									break;
1453 								}
1454 
1455 								//loop may continue if lastFrag was in fact completely unsubstitutable e.g. hydroxy...phosphoryloxy. The oxy is unsubstituable as the phosphoryl will already have bonded to it
1456 								if (FragmentTools.findSubstituableAtoms(lastFrag, frag.getOutAtom(outAtomCount - 1).getValency()).size() > 0) {
1457 									break;
1458 								}
1459 							}
1460 						}
1461 					}
1462 				}
1463 			}
1464 			else{// e.g. dimethoxyphosphoryl or bis(methylamino)phosphoryl
1465 				List<Fragment> siblingFragments = findAlternativeFragments(subBracketOrRoot);
1466 				if (siblingFragments.size()>0){
1467 					int multiplier = Integer.parseInt(subBracketOrRoot.getAttributeValue(MULTIPLIER_ATR));
1468 					Fragment nextFrag = siblingFragments.get(siblingFragments.size()-1);
1469 					Element nextGroup = nextFrag.getTokenEl();
1470 					if (nextGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && nextGroup.getAttribute(ISAMULTIRADICAL_ATR) != null && (nextFrag.getOutAtomCount()>=multiplier|| nextGroup.getAttribute(RESOLVED_ATR) != null && nextFrag.getOutAtomCount()>=multiplier +1 )){
1471 						Atom toAtom = nextFrag.getOutAtom(0).getAtom();
1472 						if (calculateSubstitutableHydrogenAtoms(toAtom) ==0){
1473 							group.addAttribute(new Attribute(RESOLVED_ATR, "yes"));
1474 							multiplyOutAndAdditivelyBond(state, subBracketOrRoot, nextFrag);//e.g.dihydroxyphosphoryl
1475 						}
1476 					}
1477 					if (group.getAttribute(RESOLVED_ATR)==null && siblingFragments.size()>1){
1478 						for (int i = 0; i< siblingFragments.size()-1; i++) {
1479 							Fragment lastFrag = siblingFragments.get(i);
1480 							Element lastGroup = lastFrag.getTokenEl();
1481 							if (lastGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && lastGroup.getAttribute(ISAMULTIRADICAL_ATR) != null && (lastFrag.getOutAtomCount()>=multiplier|| lastGroup.getAttribute(RESOLVED_ATR) != null && lastFrag.getOutAtomCount()>=multiplier +1 )){
1482 								Atom toAtom = lastFrag.getOutAtom(0).getAtom();
1483 								if (calculateSubstitutableHydrogenAtoms(toAtom) ==0){
1484 									group.addAttribute(new Attribute(RESOLVED_ATR, "yes"));
1485 									multiplyOutAndAdditivelyBond(state, subBracketOrRoot, lastFrag);//e.g. dihydroxyphosphoryloxy
1486 								}
1487 								break;
1488 							}
1489 
1490 							//loop may continue if lastFrag was in fact completely unsubstitutable e.g. hydroxy...phosphoryloxy. The oxy is unsubstituable as the phosphoryl will already have bonded to it
1491 							if (FragmentTools.findSubstituableAtoms(lastFrag, frag.getOutAtom(outAtomCount - 1).getValency()).size() > 0) {
1492 								break;
1493 							}
1494 						}
1495 					}
1496 				}
1497 			}
1498 		}
1499 	}
1500 
1501 	/**
1502 	 * Searches the input for something that either is a multiRadical or has no outAtoms i.e. not dimethyl
1503 	 * @param subBracketOrRoot
1504 	 * @return
1505 	 */
hasRootLikeOrMultiRadicalGroup(Element subBracketOrRoot)1506 	private static boolean hasRootLikeOrMultiRadicalGroup(Element subBracketOrRoot) {
1507 		List<Element> groups = OpsinTools.getDescendantElementsWithTagName(subBracketOrRoot, GROUP_EL);
1508 		if (subBracketOrRoot.getAttribute(INLOCANTS_ATR) != null){
1509 			return true;// a terminus with specified inLocants
1510 		}
1511 		for (Element group : groups) {
1512 			Fragment frag = group.getFrag();
1513 			int outAtomCount =frag.getOutAtomCount();
1514 			if (group.getAttribute(ISAMULTIRADICAL_ATR) != null){
1515 				if (outAtomCount >=1 ){
1516 					return true;//a multi radical
1517 				}
1518 			}
1519 			else if (outAtomCount ==0 && group.getAttribute(RESOLVED_ATR)==null){
1520 				return true;// a terminus
1521 			}
1522 		}
1523 		return false;
1524 	}
1525 
1526 	/**
1527 	 * Multiply out subOrBracket and additively bond all substituents to the specified fragment
1528 	 * @param state
1529 	 * @param subOrBracket
1530 	 * @param fragToAdditivelyBondTo
1531 	 * @throws StructureBuildingException
1532 	 */
multiplyOutAndAdditivelyBond(BuildState state, Element subOrBracket, Fragment fragToAdditivelyBondTo)1533 	private static void multiplyOutAndAdditivelyBond(BuildState state, Element subOrBracket, Fragment fragToAdditivelyBondTo) throws StructureBuildingException {
1534 		int multiplier = Integer.parseInt(subOrBracket.getAttributeValue(MULTIPLIER_ATR));
1535 		subOrBracket.removeAttribute(subOrBracket.getAttribute(MULTIPLIER_ATR));
1536 		List<Element> clonedElements = new ArrayList<Element>();
1537 		List<Element> elementsNotToBeMultiplied = new ArrayList<Element>();//anything before the multiplier in the sub/bracket
1538 		for (int i = multiplier -1; i >=0; i--) {
1539 			Element currentElement;
1540 			if (i != 0){
1541 				currentElement = state.fragManager.cloneElement(state, subOrBracket, i);
1542 				addPrimesToLocantedStereochemistryElements(currentElement, StringTools.multiplyString("'", i));//Stereochemistry elements with locants will need to have their locants primed (stereochemistry is only processed after structure building)
1543 				clonedElements.add(currentElement);
1544 			}
1545 			else{
1546 				currentElement = subOrBracket;
1547 				Element multiplierEl = subOrBracket.getFirstChildElement(MULTIPLIER_EL);
1548 				if (multiplierEl ==null){
1549 					throw new StructureBuildingException("Multiplier not found where multiplier expected");
1550 				}
1551 				for (int j = subOrBracket.indexOf(multiplierEl) -1 ; j >=0 ; j--) {
1552 					Element el = subOrBracket.getChild(j);
1553 					el.detach();
1554 					elementsNotToBeMultiplied.add(el);
1555 				}
1556 				multiplierEl.detach();
1557 			}
1558 			Element group;
1559 			if (currentElement.getName().equals(BRACKET_EL)){
1560 				group = findRightMostGroupInBracket(currentElement);
1561 			}
1562 			else{
1563 				group = currentElement.getFirstChildElement(GROUP_EL);
1564 			}
1565 			Fragment frag = group.getFrag();
1566 			if (frag.getOutAtomCount() != 1 ){
1567 				throw new StructureBuildingException("Additive bond formation failure: Fragment expected to have one OutAtom in this case but had: "+ frag.getOutAtomCount());
1568 			}
1569 			joinFragmentsAdditively(state, frag, fragToAdditivelyBondTo);
1570 		}
1571 		for (Element clone : clonedElements) {//make sure cloned substituents don't substitute onto each other!
1572 			OpsinTools.insertAfter(subOrBracket, clone);
1573 		}
1574 		for (Element el : elementsNotToBeMultiplied) {//re-add anything before multiplier to original subOrBracket
1575 			subOrBracket.insertChild(el, 0);
1576 		}
1577 	}
1578 
1579 	/**
1580 	 * Creates a build results from the input group for use as the input to the real performMultiplicativeOperations function
1581 	 * @param state
1582 	 * @param group
1583 	 * @param multipliedParent
1584 	 * @throws StructureBuildingException
1585 	 */
performMultiplicativeOperations(BuildState state, Element group, Element multipliedParent)1586 	private static void performMultiplicativeOperations(BuildState state, Element group, Element multipliedParent) throws StructureBuildingException{
1587 		BuildResults multiRadicalBR = new BuildResults(group.getParent());
1588 		performMultiplicativeOperations(state, multiRadicalBR, multipliedParent);
1589 	}
1590 
performMultiplicativeOperations(BuildState state, BuildResults multiRadicalBR, Element multipliedParent)1591 	private static void performMultiplicativeOperations(BuildState state, BuildResults multiRadicalBR, Element multipliedParent) throws StructureBuildingException {
1592 		int multiplier = Integer.parseInt(multipliedParent.getAttributeValue(MULTIPLIER_ATR));
1593 		if (multiplier != multiRadicalBR.getOutAtomCount()){
1594 			if (multiRadicalBR.getOutAtomCount() == multiplier*2){
1595 				//TODO substituents like nitrilo can have their outatoms combined
1596 			}
1597 			if (multiplier != multiRadicalBR.getOutAtomCount()){
1598 				throw new StructureBuildingException("Multiplication bond formation failure: number of outAtoms disagree with multiplier(multiplier: " + multiplier + ", outAtom count: " + multiRadicalBR.getOutAtomCount()+ ")");
1599 			}
1600 		}
1601 		if (LOG.isTraceEnabled()){LOG.trace(multiplier +" multiplicative bonds to be formed");}
1602 		multipliedParent.removeAttribute(multipliedParent.getAttribute(MULTIPLIER_ATR));
1603 		List<String> inLocants = null;
1604 		String inLocantsString = multipliedParent.getAttributeValue(INLOCANTS_ATR);
1605 		if (inLocantsString != null){//true for the root of a multiplicative name
1606 			if (inLocantsString.equals(INLOCANTS_DEFAULT)){
1607 				inLocants = new ArrayList<String>(multiplier);
1608 				for (int i = 0; i < multiplier; i++) {
1609 					inLocants.add(INLOCANTS_DEFAULT);
1610 				}
1611 			}
1612 			else{
1613 				inLocants = StringTools.arrayToList(inLocantsString.split(","));
1614 				if (inLocants.size() != multiplier){
1615 					throw new StructureBuildingException("Mismatch between multiplier and number of inLocants in multiplicative nomenclature");
1616 				}
1617 			}
1618 		}
1619 		List<Element> clonedElements = new ArrayList<Element>();
1620 		BuildResults newBr = new BuildResults();
1621 		for (int i = multiplier -1; i >=0; i--) {
1622 			Element multipliedElement;
1623 			if (i != 0){
1624 				multipliedElement = state.fragManager.cloneElement(state, multipliedParent, i);
1625 				addPrimesToLocantedStereochemistryElements(multipliedElement, StringTools.multiplyString("'", i));//Stereochemistry elements with locants will need to have their locants primed (stereochemistry is only processed after structure building)
1626 				clonedElements.add(multipliedElement);
1627 			}
1628 			else{
1629 				multipliedElement = multipliedParent;
1630 			}
1631 
1632 			//determine group that will be additively bonded to
1633 			Element multipliedGroup;
1634 			if (multipliedElement.getName().equals(BRACKET_EL)) {
1635 				multipliedGroup = getFirstMultiValentGroup(multipliedElement);
1636 				if (multipliedGroup == null){//root will not have a multivalent group
1637 					List<Element> groups = OpsinTools.getDescendantElementsWithTagName(multipliedElement, GROUP_EL);
1638 					if (inLocants == null){
1639 						throw new StructureBuildingException("OPSIN Bug? in locants must be specified for a multiplied root in multiplicative nomenclature");
1640 					}
1641 					if (inLocants.get(0).equals(INLOCANTS_DEFAULT)){
1642 						multipliedGroup = groups.get(groups.size() - 1);
1643 					}
1644 					else{
1645 						groupLoop: for (int j = groups.size()-1; j >=0; j--) {
1646 							Fragment possibleFrag = groups.get(j).getFrag();
1647 							for (String locant : inLocants) {
1648 								if (possibleFrag.hasLocant(locant)){
1649 									multipliedGroup = groups.get(j);
1650 									break groupLoop;
1651 								}
1652 							}
1653 						}
1654 					}
1655 					if (multipliedGroup == null){
1656 						throw new StructureBuildingException("Locants for inAtoms on the root were either misassigned to the root or were invalid: " + inLocants.toString() +" could not be assigned!");
1657 					}
1658 				}
1659 			}
1660 			else{
1661 				multipliedGroup = multipliedElement.getFirstChildElement(GROUP_EL);
1662 			}
1663 			Fragment multipliedFrag = multipliedGroup.getFrag();
1664 
1665 			OutAtom multiRadicalOutAtom = multiRadicalBR.getOutAtom(i);
1666 			Fragment multiRadicalFrag = multiRadicalOutAtom.getAtom().getFrag();
1667 			Element multiRadicalGroup = multiRadicalFrag.getTokenEl();
1668 			if (multiRadicalGroup.getAttribute(RESOLVED_ATR) == null){
1669 				resolveUnLocantedFeatures(state, multiRadicalGroup.getParent());//the addition of unlocanted unsaturators can effect the position of radicals e.g. diazenyl
1670 				multiRadicalGroup.addAttribute(new Attribute(RESOLVED_ATR, "yes"));
1671 			}
1672 
1673 			boolean substitutivelyBondedToRoot = false;
1674 			if (inLocants != null) {
1675 				Element rightMostGroup;
1676 				if (multipliedElement.getName().equals(BRACKET_EL)) {
1677 					rightMostGroup = findRightMostGroupInBracket(multipliedElement);
1678 				}
1679 				else{
1680 					rightMostGroup = multipliedElement.getFirstChildElement(GROUP_EL);
1681 				}
1682 				rightMostGroup.addAttribute(new Attribute(RESOLVED_ATR, "yes"));//this group will not be used further within this word but can in principle be a substituent e.g. methylenedisulfonyl dichloride
1683 				if (multipliedGroup.getAttribute(ISAMULTIRADICAL_ATR) != null) {//e.g. methylenedisulfonyl dichloride
1684 					if (!multipliedParent.getAttributeValue(INLOCANTS_ATR).equals(INLOCANTS_DEFAULT)) {
1685 						throw new StructureBuildingException("inLocants should not be specified for a multiradical parent in multiplicative nomenclature");
1686 					}
1687 				}
1688 				else{
1689 					Atom from = multiRadicalOutAtom.getAtom();
1690 					int bondOrder = multiRadicalOutAtom.getValency();
1691 					//bonding will be substitutive rather additive as this is bonding to a root
1692 					Atom atomToJoinTo = null;
1693 					for (int j = inLocants.size() -1; j >=0; j--) {
1694 						String locant = inLocants.get(j);
1695 						if (locant.equals(INLOCANTS_DEFAULT)){//note that if one entry in inLocantArray is default then they all are "default"
1696 							List<Atom> possibleAtoms = getPossibleAtomsForUnlocantedConnectionToMultipliedRoot(multipliedGroup, bondOrder, i);
1697 							if (possibleAtoms.isEmpty()) {
1698 								throw new StructureBuildingException("No suitable atom found for multiplicative operation");
1699 							}
1700 							if (AmbiguityChecker.isSubstitutionAmbiguous(possibleAtoms, 1)) {
1701 								state.addIsAmbiguous("Connection to multiplied group: " + multipliedGroup.getValue());
1702 							}
1703 							atomToJoinTo = possibleAtoms.get(0);
1704 							inLocants.remove(j);
1705 							break;
1706 						}
1707 						else{
1708 							Atom inAtom = multipliedFrag.getAtomByLocant(locant);
1709 							if (inAtom != null) {
1710 								atomToJoinTo = inAtom;
1711 								inLocants.remove(j);
1712 								break;
1713 							}
1714 						}
1715 					}
1716 					if (atomToJoinTo == null){
1717 						throw new StructureBuildingException("Locants for inAtoms on the root were either misassigned to the root or were invalid: " + inLocants.toString() +" could not be assigned!");
1718 					}
1719 
1720 					if (!multiRadicalOutAtom.isSetExplicitly()) {//not set explicitly so may be an inappropriate atom
1721 						from = findAtomForUnlocantedRadical(state, from.getFrag(), multiRadicalOutAtom);
1722 					}
1723 					multiRadicalFrag.removeOutAtom(multiRadicalOutAtom);
1724 
1725 					state.fragManager.createBond(from, atomToJoinTo, bondOrder);
1726 					if (LOG.isTraceEnabled()){LOG.trace("Substitutively bonded (multiplicative to root) " + from.getID() + " (" + from.getFrag().getTokenEl().getValue() + ") " + atomToJoinTo.getID() + " (" + atomToJoinTo.getFrag().getTokenEl().getValue() + ")");}
1727 					substitutivelyBondedToRoot = true;
1728 				}
1729 			}
1730 			if (!substitutivelyBondedToRoot) {
1731 				joinFragmentsAdditively(state, multiRadicalFrag, multipliedFrag);
1732 			}
1733 			if (multipliedElement.getName().equals(BRACKET_EL)) {
1734 				recursivelyResolveUnLocantedFeatures(state, multipliedElement);//there may be outAtoms that are involved in unlocanted substitution, these can be safely used now e.g. ...bis((3-hydroxy-4-methoxyphenyl)methylene) where (3-hydroxy-4-methoxyphenyl)methylene is the currentElement
1735 			}
1736 
1737 			if (inLocants == null) {
1738 				//currentElement is not a root element. Need to build up a new BuildResults so as to call performMultiplicativeOperations again
1739 				//at this stage an outAtom has been removed from the fragment within currentElement through an additive bond
1740 				newBr.mergeBuildResults(new BuildResults(multipliedElement));
1741 			}
1742 		}
1743 
1744 		if (newBr.getFragmentCount() == 1) {
1745 			throw new StructureBuildingException("Multiplicative nomenclature cannot yield only one temporary terminal fragment");
1746 		}
1747 		if (newBr.getFragmentCount() >= 2) {
1748 			List<Element> siblings = OpsinTools.getNextSiblingsOfTypes(multipliedParent, new String[]{SUBSTITUENT_EL, BRACKET_EL, ROOT_EL});
1749 			if (siblings.size() == 0) {
1750 				Element parentOfMultipliedEl = multipliedParent.getParent();
1751 				if (parentOfMultipliedEl.getName().equals(BRACKET_EL)) {//brackets are allowed
1752 					siblings = OpsinTools.getNextSiblingsOfTypes(parentOfMultipliedEl, new String[]{SUBSTITUENT_EL, BRACKET_EL, ROOT_EL});
1753 					if (siblings.get(0).getAttribute(MULTIPLIER_ATR) == null) {
1754 						throw new StructureBuildingException("Multiplier not found where multiplier was expected for succesful multiplicative nomenclature");
1755 					}
1756 					performMultiplicativeOperations(state, newBr, siblings.get(0));
1757 				}
1758 				else{
1759 					throw new StructureBuildingException("Could not find suitable element to continue multiplicative nomenclature");
1760 				}
1761 			}
1762 			else{
1763 				if (siblings.get(0).getAttribute(MULTIPLIER_ATR) == null) {
1764 					throw new StructureBuildingException("Multiplier not found where multiplier was expected for successful multiplicative nomenclature");
1765 				}
1766 				performMultiplicativeOperations(state, newBr, siblings.get(0));
1767 			}
1768 		}
1769 
1770 		for (Element clone : clonedElements) {//only insert cloned substituents now so they don't substitute onto each other!
1771 			OpsinTools.insertAfter(multipliedParent, clone);
1772 		}
1773 	}
1774 
1775 	/**
1776 	 * Applies special case to prefer the end of chains with the usableAsAJoiner attributes cf. p-phenylenedipropionic acid
1777 	 * Such cases will still be considered to be formally ambiguous
1778 	 * @param multipliedGroup
1779 	 * @param multipliedFrag
1780 	 * @param bondOrder
1781 	 * @param primesAdded
1782 	 * @return
1783 	 * @throws StructureBuildingException
1784 	 */
getPossibleAtomsForUnlocantedConnectionToMultipliedRoot(Element multipliedGroup, int bondOrder, int primesAdded)1785 	private static List<Atom> getPossibleAtomsForUnlocantedConnectionToMultipliedRoot(Element multipliedGroup, int bondOrder, int primesAdded) throws StructureBuildingException {
1786 		Fragment multipliedFrag = multipliedGroup.getFrag();
1787 		if ("yes".equals(multipliedGroup.getAttributeValue(USABLEASJOINER_ATR)) && multipliedFrag.getDefaultInAtom() == null) {
1788 			Element previous = OpsinTools.getPrevious(multipliedGroup);
1789 			if (previous != null && previous.getName().equals(MULTIPLIER_EL)){
1790 				String locant = getLocantOfEndOfChainIfGreaterThan1(multipliedFrag, primesAdded);
1791 				if (locant != null) {
1792 					Atom preferredAtom = multipliedFrag.getAtomByLocantOrThrow(locant);
1793 					List<Atom> possibleAtoms = FragmentTools.findnAtomsForSubstitution(multipliedFrag.getAtomList(), preferredAtom, 1, bondOrder, true);
1794 					if (possibleAtoms == null) {
1795 						possibleAtoms = Collections.emptyList();
1796 					}
1797 					return possibleAtoms;
1798 				}
1799 			}
1800 		}
1801 		return FragmentTools.findSubstituableAtoms(multipliedFrag, bondOrder);
1802 	}
1803 
getLocantOfEndOfChainIfGreaterThan1(Fragment frag, int primes)1804 	private static String getLocantOfEndOfChainIfGreaterThan1(Fragment frag, int primes) {
1805 		String primesStr = StringTools.multiplyString("'", primes);
1806 		int length = 0;
1807 		Atom next = frag.getAtomByLocant(Integer.toString(length + 1) + primesStr);
1808 		Atom previous = null;
1809 		while (next != null){
1810 			if (previous != null && previous.getBondToAtom(next) == null){
1811 				break;
1812 			}
1813 			length++;
1814 			previous = next;
1815 			next = frag.getAtomByLocant(Integer.toString(length + 1) + primesStr);
1816 		}
1817 		if (length > 1){
1818 			return Integer.toString(length) + primesStr;
1819 		}
1820 		return null;
1821 	}
1822 
1823 	/**
1824 	 * Given a subsituent/bracket finds the next multi valent substituent/root that is in scope and hence its group
1825 	 * e.g. for oxy(dichloromethyl)methylene given oxy substituent the methylene group would be found
1826 	 * for oxy(dichloroethylene) given oxy substituent the ethylene group would be found
1827 	 * for oxy(carbonylimino) given oxy carbonyl would be found
1828 	 * @param substituentOrBracket
1829 	 * @return frag
1830 	 * @throws StructureBuildingException
1831 	 */
getNextInScopeMultiValentFragment(Element substituentOrBracket)1832 	private static Fragment getNextInScopeMultiValentFragment(Element substituentOrBracket) throws StructureBuildingException {
1833 		if (!substituentOrBracket.getName().equals(SUBSTITUENT_EL) && !substituentOrBracket.getName().equals(BRACKET_EL)){
1834 			throw new StructureBuildingException("Input to this function should be a substituent or bracket");
1835 		}
1836 		if (substituentOrBracket.getParent()==null){
1837 			throw new StructureBuildingException("substituent did not have a parent!");
1838 		}
1839 		Element parent = substituentOrBracket.getParent();
1840 
1841 		List<Element> children = OpsinTools.getChildElementsWithTagNames(parent, new String[]{SUBSTITUENT_EL, BRACKET_EL, ROOT_EL});//will be returned in index order
1842 		int indexOfSubstituent =parent.indexOf(substituentOrBracket);
1843 		for (Element child : children) {
1844 			if (parent.indexOf(child) <=indexOfSubstituent){//only want things after the input
1845 				continue;
1846 			}
1847 			if (child.getAttribute(MULTIPLIER_ATR) != null){
1848 				continue;
1849 			}
1850 			List<Element> childDescendants;
1851 			if (child.getName().equals(BRACKET_EL)){
1852 				childDescendants = OpsinTools.getDescendantElementsWithTagNames(child, new String[]{SUBSTITUENT_EL, ROOT_EL});//will be returned in depth-first order
1853 			}
1854 			else{
1855 				childDescendants =new ArrayList<Element>();
1856 				childDescendants.add(child);
1857 			}
1858 			for (Element descendantChild : childDescendants) {
1859 				Element group = descendantChild.getFirstChildElement(GROUP_EL);
1860 				if (group == null){
1861 					throw new StructureBuildingException("substituent/root is missing its group");
1862 				}
1863 				Fragment possibleFrag = group.getFrag();
1864 				if (group.getAttribute(ISAMULTIRADICAL_ATR) != null &&
1865 						(possibleFrag.getOutAtomCount() >=2 || (possibleFrag.getOutAtomCount() >=1 && group.getAttribute(RESOLVED_ATR) != null ))){
1866 					return possibleFrag;
1867 				}
1868 			}
1869 		}
1870 		return null;
1871 	}
1872 
1873 	/**
1874 	 * Given a bracket searches in a depth first manner for the first multi valent group
1875 	 * @param bracket
1876 	 * @return group
1877 	 * @throws StructureBuildingException
1878 	 */
getFirstMultiValentGroup(Element bracket)1879 	private static Element getFirstMultiValentGroup(Element bracket) throws StructureBuildingException {
1880 		if (!bracket.getName().equals(BRACKET_EL)){
1881 			throw new StructureBuildingException("Input to this function should be a bracket");
1882 		}
1883 
1884 		List<Element> groups = OpsinTools.getDescendantElementsWithTagName(bracket, GROUP_EL);//will be returned in index order
1885 		for (Element group : groups) {
1886 			Fragment possibleFrag = group.getFrag();
1887 			if (group.getAttribute(ISAMULTIRADICAL_ATR) != null &&
1888 					(possibleFrag.getOutAtomCount() >=2 || (possibleFrag.getOutAtomCount() >=1 && group.getAttribute(RESOLVED_ATR) != null ))){
1889 				return group;
1890 			}
1891 		}
1892 		return null;
1893 	}
1894 
joinFragmentsAdditively(BuildState state, Fragment fragToBeJoined, Fragment parentFrag)1895 	private static void joinFragmentsAdditively(BuildState state, Fragment fragToBeJoined, Fragment parentFrag) throws StructureBuildingException {
1896 		Element elOfFragToBeJoined = fragToBeJoined.getTokenEl();
1897 		if (EPOXYLIKE_SUBTYPE_VAL.equals(elOfFragToBeJoined.getAttributeValue(SUBTYPE_ATR))){
1898 			for (int i = 0, l = fragToBeJoined.getOutAtomCount(); i < l; i++) {
1899 				OutAtom outAtom = fragToBeJoined.getOutAtom(i);
1900 				if (outAtom.getLocant() != null){
1901 					throw new StructureBuildingException("Inappropriate use of " + elOfFragToBeJoined.getValue());
1902 				}
1903 			}
1904 		}
1905 		int outAtomCountOnFragToBeJoined = fragToBeJoined.getOutAtomCount();
1906 		if (outAtomCountOnFragToBeJoined ==0){
1907 			throw new StructureBuildingException("Additive bond formation failure: Fragment expected to have at least one OutAtom but had none");
1908 		}
1909 
1910 		if (parentFrag.getOutAtomCount() == 0){
1911 			throw new StructureBuildingException("Additive bond formation failure: Fragment expected to have at least one OutAtom but had none");
1912 		}
1913 		OutAtom in = null;
1914 		if (parentFrag.getOutAtomCount() > 1){
1915 			int firstOutAtomOrder = parentFrag.getOutAtom(0).getValency();
1916 			boolean unresolvedAmbiguity =false;
1917 			for (int i = 1, l = parentFrag.getOutAtomCount(); i < l; i++) {
1918 				OutAtom outAtom = parentFrag.getOutAtom(i);
1919 				if (outAtom.getValency() != firstOutAtomOrder){
1920 					unresolvedAmbiguity =true;
1921 				}
1922 			}
1923 			if (unresolvedAmbiguity){//not all outAtoms on parent equivalent
1924 				firstOutAtomOrder = fragToBeJoined.getOutAtom(0).getValency();
1925 				unresolvedAmbiguity =false;
1926 				for (int i = 1, l = fragToBeJoined.getOutAtomCount(); i < l; i++) {
1927 					OutAtom outAtom = fragToBeJoined.getOutAtom(i);
1928 					if (outAtom.getValency() != firstOutAtomOrder){
1929 						unresolvedAmbiguity =true;
1930 					}
1931 				}
1932 				if (unresolvedAmbiguity && outAtomCountOnFragToBeJoined == 2){//not all outAtoms on frag to be joined are equivalent either!
1933 					//Solves the specific case of 2,2'-[ethane-1,2-diylbis(azanylylidenemethanylylidene)]diphenol vs 2,2'-[ethane-1,2-diylidenebis(azanylylidenemethanylylidene)]bis(cyclohexan-1-ol)
1934 					//but does not solve the general case as only a single look behind is performed.
1935 					Element previousGroup = OpsinTools.getPreviousGroup(elOfFragToBeJoined);
1936 					if (previousGroup != null){
1937 						Fragment previousFrag = previousGroup.getFrag();
1938 						if (previousFrag.getOutAtomCount() > 1){
1939 							int previousGroupFirstOutAtomOrder = previousFrag.getOutAtom(0).getValency();
1940 							unresolvedAmbiguity =false;
1941 							for (int i = 1, l = previousFrag.getOutAtomCount(); i < l; i++) {
1942 								OutAtom outAtom = previousFrag.getOutAtom(i);
1943 								if (outAtom.getValency() != previousGroupFirstOutAtomOrder){
1944 									unresolvedAmbiguity =true;
1945 								}
1946 							}
1947 							if (!unresolvedAmbiguity && previousGroupFirstOutAtomOrder==parentFrag.getOutAtom(0).getValency()){
1948 								for (int i = 1, l = parentFrag.getOutAtomCount(); i < l; i++) {
1949 									OutAtom outAtom = parentFrag.getOutAtom(i);
1950 									if (outAtom.getValency() != previousGroupFirstOutAtomOrder){
1951 										in = outAtom;
1952 										break;
1953 									}
1954 								}
1955 							}
1956 						}
1957 					}
1958 				}
1959 				else{
1960 					for (int i = 0, l = parentFrag.getOutAtomCount(); i < l; i++) {
1961 						OutAtom outAtom = parentFrag.getOutAtom(i);
1962 						if (outAtom.getValency()==firstOutAtomOrder){
1963 							in = outAtom;
1964 							break;
1965 						}
1966 					}
1967 				}
1968 			}
1969 		}
1970 		if (in==null){
1971 			in = parentFrag.getOutAtom(0);
1972 		}
1973 		Atom to = in.getAtom();
1974 		int bondOrder = in.getValency();
1975 		if (!in.isSetExplicitly()){//not set explicitly so may be an inappropriate atom
1976 			to = findAtomForUnlocantedRadical(state, to.getFrag(), in);
1977 		}
1978 		parentFrag.removeOutAtom(in);
1979 
1980 		OutAtom out =null;
1981 
1982 		for (int i =outAtomCountOnFragToBeJoined -1; i>=0; i--) {
1983 			if (fragToBeJoined.getOutAtom(i).getValency() == bondOrder){
1984 				out = fragToBeJoined.getOutAtom(i);
1985 				break;
1986 			}
1987 		}
1988 
1989 		if (out ==null){
1990 			if (outAtomCountOnFragToBeJoined >=bondOrder){//handles cases like nitrilo needing to be -N= (remove later outAtoms first as per usual)
1991 				int valency =0;
1992 				Atom lastOutAtom = fragToBeJoined.getOutAtom(outAtomCountOnFragToBeJoined -1).getAtom();
1993 				for (int i =outAtomCountOnFragToBeJoined -1; i >= 0; i--) {
1994 					OutAtom nextOutAtom = fragToBeJoined.getOutAtom(i);
1995 					if (nextOutAtom.getAtom() != lastOutAtom){
1996 						throw new StructureBuildingException("Additive bond formation failure: bond order disagreement");
1997 					}
1998 					valency += nextOutAtom.getValency();
1999 					if (valency==bondOrder){
2000 						nextOutAtom.setValency(valency);
2001 						out = nextOutAtom;
2002 						break;
2003 					}
2004 					fragToBeJoined.removeOutAtom(nextOutAtom);
2005 				}
2006 				if (out==null){
2007 					throw new StructureBuildingException("Additive bond formation failure: bond order disagreement");
2008 				}
2009 			}
2010 			else{
2011 				throw new StructureBuildingException("Additive bond formation failure: bond order disagreement");
2012 			}
2013 		}
2014 
2015 		Atom from = out.getAtom();
2016 		if (!out.isSetExplicitly()){//not set explicitly so may be an inappropriate atom
2017 			from = findAtomForUnlocantedRadical(state, from.getFrag(), out);
2018 		}
2019 		fragToBeJoined.removeOutAtom(out);
2020 
2021 		state.fragManager.createBond(from, to, bondOrder);
2022 		if (LOG.isTraceEnabled()){LOG.trace("Additively bonded " + from.getID() + " (" + from.getFrag().getTokenEl().getValue() + ") " + to.getID() + " (" + to.getFrag().getTokenEl().getValue() + ")" );}
2023 	}
2024 
joinFragmentsSubstitutively(BuildState state, Fragment fragToBeJoined, Atom atomToJoinTo)2025 	private static void joinFragmentsSubstitutively(BuildState state, Fragment fragToBeJoined, Atom atomToJoinTo) throws StructureBuildingException {
2026 		Element elOfFragToBeJoined = fragToBeJoined.getTokenEl();
2027 		if (EPOXYLIKE_SUBTYPE_VAL.equals(elOfFragToBeJoined.getAttributeValue(SUBTYPE_ATR))){
2028 			formEpoxide(state, fragToBeJoined, atomToJoinTo);
2029 			return;
2030 		}
2031 		int outAtomCount = fragToBeJoined.getOutAtomCount();
2032 		if (outAtomCount >1){
2033 			throw new StructureBuildingException("Substitutive bond formation failure: Fragment expected to have one OutAtom but had: "+ outAtomCount);
2034 		}
2035 		if (outAtomCount ==0 ){
2036 			throw new StructureBuildingException("Substitutive bond formation failure: Fragment expected to have one OutAtom but had none");
2037 		}
2038 		if (elOfFragToBeJoined.getAttribute(IMINOLIKE_ATR) != null){//special case for methylene/imino
2039 			if (fragToBeJoined.getOutAtomCount()==1 && fragToBeJoined.getOutAtom(0).getValency()==1 ){
2040 				fragToBeJoined.getOutAtom(0).setValency(2);
2041 			}
2042 		}
2043 		OutAtom out = fragToBeJoined.getOutAtom(0);
2044 		Atom from = out.getAtom();
2045 		int bondOrder = out.getValency();
2046 		if (!out.isSetExplicitly()){//not set explicitly so may be an inappropriate atom
2047 			List<Atom> possibleAtoms = FragmentTools.findnAtomsForSubstitution(fragToBeJoined.getAtomList(), from, 1, bondOrder, false);
2048 			if (possibleAtoms == null){
2049 				throw new StructureBuildingException("Failed to assign all unlocanted radicals to actual atoms without violating valency");
2050 			}
2051 			if (!((ALKANESTEM_SUBTYPE_VAL.equals(fragToBeJoined.getSubType()) || HETEROSTEM_SUBTYPE_VAL.equals(fragToBeJoined.getSubType())) && possibleAtoms.get(0).equals(fragToBeJoined.getFirstAtom()))) {
2052 				if (AmbiguityChecker.isSubstitutionAmbiguous(possibleAtoms, 1)) {
2053 					state.addIsAmbiguous("Positioning of radical on: " + fragToBeJoined.getTokenEl().getValue());
2054 				}
2055 			}
2056 			from = possibleAtoms.get(0);
2057 		}
2058 		fragToBeJoined.removeOutAtom(out);
2059 
2060 		state.fragManager.createBond(from, atomToJoinTo, bondOrder);
2061 		if (LOG.isTraceEnabled()){LOG.trace("Substitutively bonded " + from.getID() + " (" + from.getFrag().getTokenEl().getValue() + ") " + atomToJoinTo.getID() + " (" + atomToJoinTo.getFrag().getTokenEl().getValue() + ")");}
2062 	}
2063 
2064 	/**
2065 	 * Forms a bridge using the given fragment.
2066 	 * The bridgingFragment's outAtoms locants or a combination of the atomToJoinTo and a suitable atom
2067 	 * are used to decide what atoms to form the bridge between
2068 	 * @param state
2069 	 * @param bridgingFragment
2070 	 * @param atomToJoinTo
2071 	 * @return Atoms that the bridgingFragment attached to
2072 	 * @throws StructureBuildingException
2073 	 */
formEpoxide(BuildState state, Fragment bridgingFragment, Atom atomToJoinTo)2074 	static Atom[] formEpoxide(BuildState state, Fragment bridgingFragment, Atom atomToJoinTo) throws StructureBuildingException {
2075 		Fragment fragToJoinTo = atomToJoinTo.getFrag();
2076 		List<Atom> atomList = fragToJoinTo.getAtomList();
2077 		if (atomList.size()==1){
2078 			throw new StructureBuildingException("Epoxides must be formed between two different atoms");
2079 		}
2080 		Atom firstAtomToJoinTo;
2081 		if (bridgingFragment.getOutAtom(0).getLocant() != null){
2082 			firstAtomToJoinTo = fragToJoinTo.getAtomByLocantOrThrow(bridgingFragment.getOutAtom(0).getLocant());
2083 		}
2084 		else{
2085 			firstAtomToJoinTo = atomToJoinTo;
2086 		}
2087 		OutAtom outAtom1 = bridgingFragment.getOutAtom(0);
2088 		bridgingFragment.removeOutAtom(0);
2089 
2090 		//In epoxy chalcogenAtom1 will be chalcogenAtom2. Methylenedioxy is also handled by this method
2091 		state.fragManager.createBond(outAtom1.getAtom(), firstAtomToJoinTo, outAtom1.getValency());
2092 
2093 		Atom secondAtomToJoinTo;
2094 		if (bridgingFragment.getOutAtom(0).getLocant() != null){
2095 			secondAtomToJoinTo = fragToJoinTo.getAtomByLocantOrThrow(bridgingFragment.getOutAtom(0).getLocant());
2096 		}
2097 		else{
2098 			int index = atomList.indexOf(firstAtomToJoinTo);
2099 			Atom preferredAtom = (index + 1 >= atomList.size()) ? atomList.get(index - 1) : atomList.get(index + 1);
2100 			List<Atom> possibleSecondAtom = FragmentTools.findnAtomsForSubstitution(fragToJoinTo.getAtomList(), preferredAtom, 1, 1, true);
2101 			if (possibleSecondAtom != null) {
2102 				possibleSecondAtom.removeAll(Collections.singleton(firstAtomToJoinTo));
2103 			}
2104 			if (possibleSecondAtom == null || possibleSecondAtom.size() == 0) {
2105 				throw new StructureBuildingException("Unable to find suitable atom to form bridge");
2106 			}
2107 			if (AmbiguityChecker.isSubstitutionAmbiguous(possibleSecondAtom, 1)) {
2108 				state.addIsAmbiguous("Addition of bridge to: "+ fragToJoinTo.getTokenEl().getValue());
2109 			}
2110 			secondAtomToJoinTo = possibleSecondAtom.get(0);
2111 		}
2112 		OutAtom outAtom2 = bridgingFragment.getOutAtom(0);
2113 		bridgingFragment.removeOutAtom(0);
2114 		if (outAtom1.getAtom().equals(outAtom2.getAtom()) && firstAtomToJoinTo == secondAtomToJoinTo){
2115 			throw new StructureBuildingException("Epoxides must be formed between two different atoms");
2116 		}
2117 		int bondValency = outAtom2.getValency();
2118 		if (outAtom2.getAtom().hasSpareValency() && !secondAtomToJoinTo.hasSpareValency()) {
2119 			//bridging groups like azeno are treated as aromatic so that it is not fixed as to which of the two bonds is the double bond
2120 			//if connected to a saturated group though, one of them must be a double bond
2121 			bondValency = 2;
2122 		}
2123 		state.fragManager.createBond(outAtom2.getAtom(), secondAtomToJoinTo, bondValency);
2124 		CycleDetector.assignWhetherAtomsAreInCycles(bridgingFragment);
2125 		return new Atom[]{firstAtomToJoinTo, secondAtomToJoinTo};
2126 	}
2127 
2128 	/**
2129 	 * Attempts to find an in-scope fragment capable of forming the given numberOfSubstitutions each with the given bondOrder
2130 	 * @param subOrBracket
2131 	 * @param numberOfSubstitutions
2132 	 * @param bondOrder
2133 	 * @return
2134 	 */
findAtomsForSubstitution(Element subOrBracket, int numberOfSubstitutions, int bondOrder)2135 	private static List<Atom> findAtomsForSubstitution(Element subOrBracket, int numberOfSubstitutions, int bondOrder) {
2136 		FindAlternativeGroupsResult results = findAlternativeGroups(subOrBracket);
2137 		List<Atom> substitutableAtoms = findAtomsForSubstitution(results.groups, numberOfSubstitutions, bondOrder, true);
2138 		if (substitutableAtoms != null) {
2139 			return substitutableAtoms;
2140 		}
2141 		substitutableAtoms = findAtomsForSubstitution(results.groups, numberOfSubstitutions, bondOrder, false);
2142 		if (substitutableAtoms != null) {
2143 			return substitutableAtoms;
2144 		}
2145 		substitutableAtoms = findAtomsForSubstitution(results.groupsSubstitutionUnlikely, numberOfSubstitutions, bondOrder, true);
2146 		if (substitutableAtoms != null) {
2147 			return substitutableAtoms;
2148 		}
2149 		substitutableAtoms = findAtomsForSubstitution(results.groupsSubstitutionUnlikely, numberOfSubstitutions, bondOrder, false);
2150 		return substitutableAtoms;
2151 	}
2152 
findAtomsForSubstitution(List<Element> possibleParents, int numberOfSubstitutions, int bondOrder, boolean preserveValency)2153 	private static List<Atom> findAtomsForSubstitution(List<Element> possibleParents, int numberOfSubstitutions, int bondOrder, boolean preserveValency) {
2154 		boolean rootHandled = false;
2155 		for (int i = 0, l = possibleParents.size(); i < l; i++) {
2156 			Element possibleParent = possibleParents.get(i);
2157 			Fragment frag = possibleParent.getFrag();
2158 			List<Atom> substitutableAtoms;
2159 			if (possibleParent.getParent().getName().equals(ROOT_EL)){//consider all root groups as if they were one
2160 				if(rootHandled) {
2161 					continue;
2162 				}
2163 				List<Atom> atoms = frag.getAtomList();
2164 				for (int j = i + 1; j < l; j++) {
2165 					Element possibleOtherRoot = possibleParents.get(j);
2166 					if (possibleOtherRoot.getParent().getName().equals(ROOT_EL)) {
2167 						atoms.addAll(possibleOtherRoot.getFrag().getAtomList());
2168 					}
2169 				}
2170 				rootHandled = true;
2171 				substitutableAtoms = FragmentTools.findnAtomsForSubstitution(atoms, frag.getDefaultInAtom(), numberOfSubstitutions, bondOrder, true, preserveValency);
2172 			}
2173 			else{
2174 				substitutableAtoms = FragmentTools.findnAtomsForSubstitution(frag.getAtomList(), frag.getDefaultInAtom(), numberOfSubstitutions, bondOrder, true, preserveValency);
2175 			}
2176 			if (substitutableAtoms != null){
2177 				return substitutableAtoms;
2178 			}
2179 		}
2180 		return null;
2181 	}
2182 
2183 	/**
2184 	 * Finds all the fragments accessible from the startingElement taking into account brackets
2185 	 * i.e. those that it is feasible that the group of the startingElement could substitute onto
2186 	 * @param startingElement
2187 	 * @return A list of fragments in the order to try them as possible parent fragments (for substitutive operations)
2188 	 */
findAlternativeFragments(Element startingElement)2189 	static List<Fragment> findAlternativeFragments(Element startingElement) {
2190 		List<Fragment> foundFragments = new ArrayList<Fragment>();
2191 		FindAlternativeGroupsResult results = findAlternativeGroups(startingElement);
2192 		for (Element group : results.groups) {
2193 			foundFragments.add(group.getFrag());
2194 		}
2195 		for (Element group : results.groupsSubstitutionUnlikely) {
2196 			foundFragments.add(group.getFrag());
2197 		}
2198 		return foundFragments;
2199 	}
2200 
2201 	/**
2202 	 * Finds all the groups accessible from the startingElement taking into account brackets
2203 	 * i.e. those that it is feasible that the group of the startingElement could substitute onto
2204 	 * (locanting onto bracketted groups is unlikely so these are kept seperate in the results object)
2205 	 * @param startingElement
2206 	 * @return An object containing the groups in the order to try them as possible parent groups (for substitutive operations)
2207 	 */
findAlternativeGroups(Element startingElement)2208 	static FindAlternativeGroupsResult findAlternativeGroups(Element startingElement) {
2209 		Deque<AlternativeGroupFinderState> stack = new ArrayDeque<AlternativeGroupFinderState>();
2210 		stack.add(new AlternativeGroupFinderState(startingElement.getParent(), false));
2211 		List<Element> groups = new ArrayList<Element>();
2212 		List<Element> groupsSubstitutionUnlikely = new ArrayList<Element>();//locanting into brackets is rarely the desired answer so keep these separate
2213 		boolean doneFirstIteration = false;//check on index only done on first iteration to only get elements with an index greater than the starting element
2214 		while (stack.size() > 0) {
2215 			AlternativeGroupFinderState state = stack.removeLast();
2216 			Element currentElement = state.el;
2217 			boolean substitutionUnlikely = state.substitutionUnlikely;
2218 			if (currentElement.getName().equals(GROUP_EL)) {
2219 				if (substitutionUnlikely) {
2220 					groupsSubstitutionUnlikely.add(currentElement);
2221 				}
2222 				else {
2223 					groups.add(currentElement);
2224 				}
2225 				continue;
2226 			}
2227 			List<Element> siblings = OpsinTools.getChildElementsWithTagNames(currentElement, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL});
2228 
2229 			for (Element bracketOrSubOrRoot : siblings) {
2230 				if (!doneFirstIteration && currentElement.indexOf(bracketOrSubOrRoot) <= currentElement.indexOf(startingElement)){
2231 					continue;
2232 				}
2233 				if (bracketOrSubOrRoot.getAttribute(MULTIPLIER_ATR) != null){
2234 					continue;
2235 				}
2236 				boolean substitutionUnlikelyForThisEl = substitutionUnlikely;
2237 				if (bracketOrSubOrRoot.getName().equals(BRACKET_EL)){
2238 					if (!IMPLICIT_TYPE_VAL.equals(bracketOrSubOrRoot.getAttributeValue(TYPE_ATR))) {
2239 						substitutionUnlikelyForThisEl = true;
2240 					}
2241 					stack.add(new AlternativeGroupFinderState(bracketOrSubOrRoot, substitutionUnlikelyForThisEl));
2242 				}
2243 				else{
2244 					if (bracketOrSubOrRoot.getAttribute(LOCANT_ATR) != null) {
2245 						substitutionUnlikelyForThisEl = true;
2246 					}
2247 					Element group = bracketOrSubOrRoot.getFirstChildElement(GROUP_EL);
2248 					stack.add(new AlternativeGroupFinderState(group, substitutionUnlikelyForThisEl));
2249 				}
2250 			}
2251 			doneFirstIteration = true;
2252 		}
2253 		return new FindAlternativeGroupsResult(groups, groupsSubstitutionUnlikely);
2254 	}
2255 
2256 	private static class AlternativeGroupFinderState {
2257 		private final Element el;
2258 		private final boolean substitutionUnlikely;
2259 
AlternativeGroupFinderState(Element el, boolean substitutionUnlikely)2260 		AlternativeGroupFinderState(Element el, boolean substitutionUnlikely) {
2261 			this.el = el;
2262 			this.substitutionUnlikely = substitutionUnlikely;
2263 		}
2264 	}
2265 
2266 	private static class FindAlternativeGroupsResult {
2267 		private final List<Element> groups;
2268 		private final List<Element> groupsSubstitutionUnlikely;
2269 
FindAlternativeGroupsResult(List<Element> groups, List<Element> groupsSubstitutionUnlikely)2270 		FindAlternativeGroupsResult(List<Element> groups, List<Element> groupsSubstitutionUnlikely) {
2271 			this.groups = groups;
2272 			this.groupsSubstitutionUnlikely = groupsSubstitutionUnlikely;
2273 		}
2274 	}
2275 
2276 	/**
2277 	 * Checks through the groups accessible from the currentElement taking into account brackets
2278 	 * i.e. those that it is feasible that the group of the currentElement could substitute onto
2279 	 * @param startingElement
2280 	 * @param locant: the locant string to check for the presence of
2281 	 * @return The fragment with the locant, or null
2282 	 * @throws StructureBuildingException
2283 	 */
findFragmentWithLocant(Element startingElement, String locant)2284 	private static Fragment findFragmentWithLocant(Element startingElement, String locant) throws StructureBuildingException {
2285 		Deque<Element> stack = new ArrayDeque<Element>();
2286 		stack.add(startingElement.getParent());
2287 		boolean doneFirstIteration = false;//check on index only done on first iteration to only get elements with an index greater than the starting element
2288 		Fragment monoNuclearHydride = null;//e.g. methyl/methane - In this case no locant would be expected as unlocanted substitution is always unambiguous. Hence deprioritise
2289 		while (stack.size() > 0) {
2290 			Element currentElement = stack.removeLast();
2291 			if (currentElement.getName().equals(SUBSTITUENT_EL) || currentElement.getName().equals(ROOT_EL)) {
2292 				Fragment groupFrag = currentElement.getFirstChildElement(GROUP_EL).getFrag();
2293 				if (monoNuclearHydride != null && currentElement.getAttribute(LOCANT_ATR) != null) {//It looks like all groups are locanting onto the monoNuclearHydride e.g. 1-oxo-1-phenyl-sulfanylidene
2294 					return monoNuclearHydride;
2295 				}
2296 				if (groupFrag.hasLocant(locant)) {
2297 					if (locant.equals("1") && groupFrag.getAtomCount() == 1) {
2298 						if (monoNuclearHydride == null) {
2299 							monoNuclearHydride = groupFrag;
2300 						}
2301 					}
2302 					else{
2303 						return groupFrag;
2304 					}
2305 				}
2306 				continue;
2307 			}
2308 			else if (monoNuclearHydride != null) {
2309 				return monoNuclearHydride;
2310 			}
2311 			List<Element> siblings = OpsinTools.getChildElementsWithTagNames(currentElement, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL});
2312 
2313 			List<Element> bracketted = new ArrayList<Element>();
2314 			if (!doneFirstIteration) {//on the first iteration, ignore elements before the starting element and favour the element directly after the starting element (conditions apply)
2315 				int indexOfStartingEl = currentElement.indexOf(startingElement);
2316 				Element substituentToTryFirst = null;
2317 				for (Element bracketOrSubOrRoot : siblings) {
2318 					int indexOfCurrentEl = currentElement.indexOf(bracketOrSubOrRoot);
2319 					if (indexOfCurrentEl <= indexOfStartingEl) {
2320 						continue;
2321 					}
2322 					if (bracketOrSubOrRoot.getAttribute(MULTIPLIER_ATR) != null) {
2323 						continue;
2324 					}
2325 
2326 					if (bracketOrSubOrRoot.getName().equals(BRACKET_EL)) {
2327 						if (IMPLICIT_TYPE_VAL.equals(bracketOrSubOrRoot.getAttributeValue(TYPE_ATR)) && bracketOrSubOrRoot.getAttribute(LOCANT_EL) == null) {
2328 							//treat implicit brackets without locants as if they are not there
2329 							for (Element descendent : getChildrenIgnoringLocantlessImplicitBrackets(bracketOrSubOrRoot)) {
2330 								if (descendent.getName().equals(BRACKET_EL)) {
2331 									bracketted.add(descendent);
2332 								}
2333 								else {
2334 									if (substituentToTryFirst == null && descendent.getAttribute(LOCANT_EL) == null && MATCH_NUMERIC_LOCANT.matcher(locant).matches()) {
2335 										substituentToTryFirst = descendent;
2336 									}
2337 									else {
2338 										stack.add(descendent);
2339 									}
2340 								}
2341 							}
2342 						}
2343 						else {
2344 							bracketted.add(bracketOrSubOrRoot);
2345 						}
2346 					}
2347 					else {
2348 						if (substituentToTryFirst == null && bracketOrSubOrRoot.getAttribute(LOCANT_EL) == null && MATCH_NUMERIC_LOCANT.matcher(locant).matches()) {
2349 							substituentToTryFirst = bracketOrSubOrRoot;
2350 						}
2351 						else {
2352 							stack.add(bracketOrSubOrRoot);
2353 						}
2354 					}
2355 				}
2356 				if (substituentToTryFirst != null) {
2357 					stack.add(substituentToTryFirst);
2358 				}
2359 				doneFirstIteration = true;
2360 			}
2361 			else {
2362 				for (Element bracketOrSubOrRoot : siblings) {
2363 					if (bracketOrSubOrRoot.getAttribute(MULTIPLIER_ATR) != null) {
2364 						continue;
2365 					}
2366 					if (bracketOrSubOrRoot.getName().equals(BRACKET_EL)) {
2367 						if (IMPLICIT_TYPE_VAL.equals(bracketOrSubOrRoot.getAttributeValue(TYPE_ATR)) && bracketOrSubOrRoot.getAttribute(LOCANT_EL) == null) {
2368 							//treat implicit brackets without locants as if they are not there
2369 							for (Element descendent : getChildrenIgnoringLocantlessImplicitBrackets(bracketOrSubOrRoot)) {
2370 								if (descendent.getName().equals(BRACKET_EL)) {
2371 									bracketted.add(descendent);
2372 								}
2373 								else {
2374 									stack.add(descendent);
2375 								}
2376 							}
2377 						}
2378 						else {
2379 							bracketted.add(bracketOrSubOrRoot);
2380 						}
2381 					}
2382 					else {
2383 						stack.add(bracketOrSubOrRoot);
2384 					}
2385 				}
2386 			}
2387 			//locanting into brackets is rarely the desired answer so place at the bottom of the stack
2388 			for (int i = bracketted.size() -1; i >=0; i--) {
2389 				stack.addFirst(bracketted.get(i));
2390 			}
2391 		}
2392 		return monoNuclearHydride;
2393 	}
2394 
getChildrenIgnoringLocantlessImplicitBrackets(Element implicitBracket)2395 	private static List<Element> getChildrenIgnoringLocantlessImplicitBrackets(Element implicitBracket) {
2396 		List<Element> childrenAndImplicitBracketChildren = new ArrayList<Element>();
2397 		for (Element child : implicitBracket.getChildElements()) {
2398 			if (child.getName().equals(BRACKET_EL) && IMPLICIT_TYPE_VAL.equals(child.getAttributeValue(TYPE_ATR)) && child.getAttribute(LOCANT_EL) == null) {
2399 				childrenAndImplicitBracketChildren.addAll(getChildrenIgnoringLocantlessImplicitBrackets(child));
2400 			}
2401 			else {
2402 				childrenAndImplicitBracketChildren.add(child);
2403 			}
2404 		}
2405 		return childrenAndImplicitBracketChildren;
2406 	}
2407 
findRightMostGroupInBracket(Element bracket)2408 	static Element findRightMostGroupInBracket(Element bracket) {
2409 		List<Element> subsBracketsAndRoots = OpsinTools.getChildElementsWithTagNames(bracket, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL});
2410 		Element lastSubsBracketOrRoot = subsBracketsAndRoots.get(subsBracketsAndRoots.size() - 1);
2411 		while (lastSubsBracketOrRoot.getName().equals(BRACKET_EL)) {
2412 			subsBracketsAndRoots = OpsinTools.getChildElementsWithTagNames(lastSubsBracketOrRoot, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL});
2413 			lastSubsBracketOrRoot = subsBracketsAndRoots.get(subsBracketsAndRoots.size() - 1);
2414 		}
2415 		return findRightMostGroupInSubOrRoot(lastSubsBracketOrRoot);
2416 	}
2417 
findRightMostGroupInSubBracketOrRoot(Element subBracketOrRoot)2418 	static Element findRightMostGroupInSubBracketOrRoot(Element subBracketOrRoot) {
2419 		if (subBracketOrRoot.getName().equals(BRACKET_EL)) {
2420 			return findRightMostGroupInBracket(subBracketOrRoot);
2421 		}
2422 		else {
2423 			return findRightMostGroupInSubOrRoot(subBracketOrRoot);
2424 		}
2425 	}
2426 
findRightMostGroupInSubOrRoot(Element subOrRoot)2427 	private static Element findRightMostGroupInSubOrRoot(Element subOrRoot) {
2428 		for (int i = subOrRoot.getChildCount() - 1; i >= 0; i--) {
2429 			Element el = subOrRoot.getChild(i);
2430 			if (el.getName().equals(GROUP_EL)) {
2431 				return el;
2432 			}
2433 		}
2434 		return null;
2435 	}
2436 
potentiallyCanSubstitute(Element subBracketOrRoot)2437 	private static boolean potentiallyCanSubstitute(Element subBracketOrRoot) {
2438 		Element parent = subBracketOrRoot.getParent();
2439 		List<Element> children =parent.getChildElements();
2440 		for (int i = parent.indexOf(subBracketOrRoot) +1 ; i < children.size(); i++) {
2441 			if (!children.get(i).getName().equals(HYPHEN_EL)){
2442 				return true;
2443 			}
2444 		}
2445 		return false;
2446 	}
2447 
checkForBracketedPrimedLocantSpecialCase(Element subBracketOrRoot, String locantString)2448 	static String checkForBracketedPrimedLocantSpecialCase(Element subBracketOrRoot, String locantString) {
2449 		int terminalPrimes = StringTools.countTerminalPrimes(locantString);
2450 		if (terminalPrimes > 0){
2451 			int brackettingDepth = 0;
2452 			Element parent = subBracketOrRoot.getParent();
2453 			while (parent != null && parent.getName().equals(BRACKET_EL)){
2454 				if (!IMPLICIT_TYPE_VAL.equals(parent.getAttributeValue(TYPE_ATR))){
2455 					brackettingDepth++;
2456 				}
2457 				parent = parent.getParent();
2458 			}
2459 			if (terminalPrimes == brackettingDepth){
2460 				return locantString.substring(0, locantString.length() - terminalPrimes);
2461 			}
2462 		}
2463 		return null;
2464 	}
2465 
2466 	/**
2467 	 * In cases such as methylenecyclohexane two outAtoms are combined to form a single outAtom with valency
2468 	 * equal to sum of the valency of the other outAtoms.
2469 	 * This is only allowed on substituents where all the outAtoms are on the same atom
2470 	 * @param frag
2471 	 * @param group
2472 	 * @throws StructureBuildingException
2473 	 */
checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(Fragment frag, Element group)2474 	private static void checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(Fragment frag, Element group) throws StructureBuildingException {
2475 		int outAtomCount = frag.getOutAtomCount();
2476 		if (outAtomCount <= 1) {
2477 			return;
2478 		}
2479 		if (EPOXYLIKE_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){
2480 			return;
2481 		}
2482 		String groupValue = group.getValue();
2483 		if (groupValue.equals("oxy") || groupValue.equals("thio") || groupValue.equals("seleno") || groupValue.equals("telluro")){//always bivalent
2484 			return;
2485 		}
2486 		//special case: all outAtoms on same atom e.g. methylenecyclohexane
2487 		Atom firstOutAtom = frag.getOutAtom(0).getAtom();
2488 		int valencyOfOutAtom = 0;
2489 		for (int i = outAtomCount - 1; i >=0 ; i--) {//remove all outAtoms and add one with the total valency of all those that have been removed
2490 			OutAtom out = frag.getOutAtom(i);
2491 			if (!out.getAtom().equals(firstOutAtom)){
2492 				throw new StructureBuildingException("Substitutive bond formation failure: Fragment expected to have one OutAtom but had: "+ outAtomCount);
2493 			}
2494 			valencyOfOutAtom += out.getValency();
2495 			frag.removeOutAtom(i);
2496 		}
2497 		frag.addOutAtom(firstOutAtom, valencyOfOutAtom, true);
2498 	}
2499 
2500 	/**
2501 	 * Calculates the number of substitutable hydrogen by taking into account:
2502 	 * Specified valency if applicable, outAtoms and the lowest valency state that will satisfy these
2503 	 * e.g. thio has 2 outAtoms and no bonds hence -->2 outgoing, lowest stable valency = 2 hence no substitutable hydrogen
2504 	 * e.g. phosphonyl has 2 outAtoms and one double bond -->4 outgoing, lowest stable valency =5 hence 1 substitutable hydrogen
2505 	 * @param atom
2506 	 * @return
2507 	 */
calculateSubstitutableHydrogenAtoms(Atom atom)2508 	static int calculateSubstitutableHydrogenAtoms(Atom atom) {
2509 		if (!atom.getImplicitHydrogenAllowed()) {
2510 			return 0;
2511 		}
2512 		int valency = atom.determineValency(true);
2513 		int currentValency = atom.getIncomingValency() + atom.getOutValency();
2514 		int substitutableHydrogen = valency - currentValency;
2515 		return substitutableHydrogen >= 0 ? substitutableHydrogen : 0;
2516 	}
2517 
2518 	/**
2519 	 * Stereochemistry terms are assigned right at the end so that checks can be done on whether the indicated atom is in fact chiral.
2520 	 * In the process of multiplication locants are primed. This function adds the appropriate number of primes to any locanted stereochemistry locants
2521 	 * The primesString is the string containing the primes to add to each locant
2522 	 * @param subOrBracket
2523 	 * @param primesString
2524 	 */
addPrimesToLocantedStereochemistryElements(Element subOrBracket, String primesString)2525 	private static void addPrimesToLocantedStereochemistryElements(Element subOrBracket, String primesString) {
2526 		List<Element> stereoChemistryElements =OpsinTools.getDescendantElementsWithTagName(subOrBracket, STEREOCHEMISTRY_EL);
2527 		for (Element stereoChemistryElement : stereoChemistryElements) {
2528 			if (stereoChemistryElement.getAttribute(LOCANT_ATR) != null){
2529 				stereoChemistryElement.getAttribute(LOCANT_ATR).setValue(stereoChemistryElement.getAttributeValue(LOCANT_ATR) + primesString);
2530 			}
2531 		}
2532 	}
2533 
2534 	/**
2535 	 * Calculates the number of times getParent() must be called to reach a word element
2536 	 * Returns null if element does not have an enclosing word element.
2537 	 * @param element
2538 	 * @return
2539 	 */
levelsToWordEl(Element element)2540 	private static Integer levelsToWordEl(Element element) {
2541 		int count =0;
2542 		while (!element.getName().equals(WORD_EL)){
2543 			element = element.getParent();
2544 			if (element == null){
2545 				return null;
2546 			}
2547 			count++;
2548 		}
2549 		return count;
2550 	}
2551 }
2552