1 package org.broadinstitute.hellbender.testutils;
2 
3 import com.google.common.annotations.VisibleForTesting;
4 import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.MultiSampleEdge;
5 import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.MultiDeBruijnVertex;
6 import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraph;
7 import org.broadinstitute.hellbender.utils.Utils;
8 
9 import java.util.*;
10 import java.util.regex.Matcher;
11 import java.util.regex.Pattern;
12 
13 public final class TestingReadThreadingGraph extends ReadThreadingGraph {
14     /*************************************************************
15      * Simple string representation support for testing purposes *
16      *************************************************************/
17 
18     private static final Pattern PROPERTIES_PATTERN = Pattern.compile("^\\s*\\[[^\\]]*\\]");
19     private static final Pattern PATH_PATTERN = Pattern.compile("\\{((\\S+):)?([^\\}]*)\\}");
20     private static final Pattern KMERSIZE_EXTRACTOR_PATTERN = Pattern.compile("^\\s*\\[[^\\]]*(ks|kmerSize)\\s*=\\s*(\\d+)\\s*[,\\]]");
21     private static final long serialVersionUID = 1l;
22 
23 
24     /**
25      * Constructs a read-threading-graph for a string representation.
26      *
27      * <p>
28      *     Note: only used for testing.
29      *     Checkout {@see HaplotypeGraphUnitTest} for examples.
30      * </p>
31      * @param s the string representation of the graph {@code null}.
32      */
TestingReadThreadingGraph(final String s)33     public TestingReadThreadingGraph(final String s) {
34         super(kmerSizeFromString(s));
35         applyString(s);
36         setAlreadyBuilt();
37     }
38 
39     /**
40      * Obtain the kmer size for the string representation.
41      * @param str the source string representation.
42      * @return 1 or greater.
43      * @throws IllegalArgumentException if {@code} str does not contain a valid representation.
44      */
kmerSizeFromString(final String str)45     private static int kmerSizeFromString(final String str) {
46         final Matcher matcher = KMERSIZE_EXTRACTOR_PATTERN.matcher(str);
47         if (matcher.find()) {
48             return Integer.parseInt(matcher.group(2));
49         } else {
50             throw new IllegalArgumentException("the input graph spec does not indicate the kmerSize");
51         }
52     }
53 
54     /**
55      * Apply description string into the graph.
56      *
57      * <p>
58      *     Note: this is done just for testing purposes.
59      *     Checkout {@see HaplotypeGraphUnitTest} for examples.
60      * </p>
61      * @param str the string representation.
62      */
applyString(final String str)63     private void applyString(final String str) {
64         final Matcher propertiesSectionMatcher = PROPERTIES_PATTERN.matcher(str);
65         final int pathStart = propertiesSectionMatcher.find() ? propertiesSectionMatcher.end() : 0;
66 
67         final String pathString = str.substring(pathStart);
68         final Matcher pathMatcher = PATH_PATTERN.matcher(pathString);
69 
70         boolean referenceFound = false;
71         final Map<String, MultiDeBruijnVertex> vertexById = new HashMap<>();
72 
73         // Loop between path strings and add them one by one.
74         while (pathMatcher.find()) {
75             final String label = pathMatcher.group(2);
76             final boolean isReference = "REF".equals(label);
77             if (referenceFound) {
78                 Utils.validateArg(!isReference, "there are two reference paths");
79             } else if ( isReference ) {
80                 referenceFound = true;
81             }
82 
83             // Divide each path into its elements getting a list of sequences and labels if applies:
84             final String elementsString = pathMatcher.group(3);
85             final String[] elements = elementsString.split("\\s*->\\s*");
86             Utils.validateArg(elements.length > 0, "empty path not allowed");
87             final String[] seqs = new String[elements.length];
88             final String[] ids = new String[elements.length];
89             for (int i = 0; i < elements.length; i++) {
90                 ids[i] = pathElementId(elements[i]);
91                 seqs[i] = pathElementSeq(elements[i]);
92                 Utils.validateArg(!(seqs[i].isEmpty() && ids[i] == null), "path with empty element without an id");
93             }
94             final boolean isSource =  ids[0] == null || !vertexById.containsKey(ids[0]);
95             if (isSource && seqs[0].length() != kmerSize) {
96                 throw new IllegalArgumentException("source sequence length must be the same as the kmerSize "
97                         + ids[0] + ' ' + seqs[0] + ' ' + pathMatcher.group());
98             }
99             final MultiDeBruijnVertex firstVertex;
100             if (ids[0] != null && vertexById.containsKey(ids[0])) {
101                 firstVertex = vertexById.get(ids[0]);
102             } else {
103                 firstVertex = new MultiDeBruijnVertex(seqs[0].getBytes());
104                 addVertex(firstVertex);
105                 if (ids[0] != null) {
106                     vertexById.put(ids[0], firstVertex);
107                 }
108             }
109             if (!seqs[0].isEmpty() &&
110                     ((isSource && !firstVertex.getSequenceString().equals(seqs[0]))
111                             || (!isSource && firstVertex.getSuffix() != seqs[0].getBytes()[0]))) {
112                 throw new IllegalArgumentException("mismatched first element sequence");
113             }
114 
115             MultiDeBruijnVertex lastVertex = firstVertex;
116             for (int i = 1; i < elements.length; i++) {
117                 //TODO: code and comment disagree
118                 Utils.validateArg(seqs[i].length() <= 1, "non-source vertex sequence must have length 1");
119                 final MultiDeBruijnVertex nextVertex;
120                 if (ids[i] == null || !vertexById.containsKey(ids[i])) {
121                     final Set<MultiDeBruijnVertex> nextVertices = getNextVertices(lastVertex,seqs[i].getBytes()[0]);
122                     if (nextVertices.isEmpty()) {
123                         nextVertex = new MultiDeBruijnVertex(extendSequence(lastVertex.getSequence(),seqs[i].getBytes()[0]));
124                         addVertex(nextVertex);
125                     } else {
126                         nextVertex = nextVertices.iterator().next();
127                     }
128                     if (ids[i] != null) {
129                         vertexById.put(ids[i], nextVertex);
130                     }
131                 } else {
132                     nextVertex = vertexById.get(ids[i]);
133                 }
134                 final MultiSampleEdge edge = addEdge(lastVertex,nextVertex);
135                 if (isReference) {
136                     edge.setIsRef(true);
137                 }
138                 lastVertex = nextVertex;
139             }
140         }
141     }
142 
143     /**
144      * Return the collection of outgoing vertices that expand this vertex with a particular base.
145      *
146      * @param v original vertex.
147      * @param b expanding base.
148      * @return never null, but perhaps an empty set. You cannot assume that you can modify the result.
149      */
150     @VisibleForTesting
getNextVertices(final MultiDeBruijnVertex v, final byte b)151     Set<MultiDeBruijnVertex> getNextVertices(final MultiDeBruijnVertex v, final byte b) {
152         Utils.nonNull(v, "the input vertex cannot be null");
153         Utils.validateArg(vertexSet().contains(v), "the vertex must be present in the graph");
154         final List<MultiDeBruijnVertex> result = new LinkedList<>();
155         for (final MultiDeBruijnVertex w : outgoingVerticesOf(v)) {
156             if (w.getSuffix() == b) {
157                 result.add(w);
158             }
159         }
160         switch (result.size()) {
161             case 0: return Collections.emptySet();
162             case 1: return Collections.singleton(result.get(0));
163             default:
164                 return new HashSet<>(result);
165         }
166     }
167 
pathElementId(final String element)168     private static String pathElementId(final String element) {
169         final int openBracketPosition = element.indexOf('(');
170 
171         if (openBracketPosition == -1) {
172             return null;
173         }
174 
175         final int closeBracketPosition = element.lastIndexOf(')');
176         Utils.validateArg(closeBracketPosition != -1, () -> "non-closed id parantesys found in element: " + element);
177         final String result = element.substring(openBracketPosition + 1,closeBracketPosition).trim();
178         Utils.validateArg(!result.isEmpty(), () -> "empty id found in element: " + element);
179         return result;
180     }
181 
182     /**
183      * Returns the lenght of a path element in the string representation.
184      * @param element the query element.
185      * @return 0 or greater.
186      */
pathElementSeq(final String element)187     private static String pathElementSeq(final String element) {
188         final int parentesysPos = element.indexOf('(');
189 
190         if (parentesysPos == -1) {
191             return element.trim();
192         }
193 
194         return element.substring(0,parentesysPos).trim();
195     }
196 
197     /**
198      * Add a base to the end of a byte sequence.
199      * @param sequence sequence where to add the base to.
200      * @param b base to add.
201      * @return never {@code null}, a new array each time.
202      */
extendSequence(final byte[] sequence, final byte b)203     private static byte[] extendSequence(final byte[] sequence, final byte b) {
204         final byte[] result = new byte[sequence.length];
205         System.arraycopy(sequence, 1, result, 0, sequence.length - 1);
206         result[result.length - 1] = b;
207         return result;
208     }
209 
210     @Override
clone()211     public TestingReadThreadingGraph clone() {
212         return (TestingReadThreadingGraph) super.clone();
213     }
214 }
215