1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32
33 #ifndef SEQAN_HEADER_GRAPH_IMPL_WORDGRAPH_H
34 #define SEQAN_HEADER_GRAPH_IMPL_WORDGRAPH_H
35
36 namespace SEQAN_NAMESPACE_MAIN
37 {
38
39 template <typename TSpec = Default>
40 struct WordGraph;
41
42 //////////////////////////////////////////////////////////////////////////////
43 // Graph - WordGraph
44 //////////////////////////////////////////////////////////////////////////////
45
46 //////////////////////////////////////////////////////////////////////////////
47
48 /*!
49 * @class WordGraph
50 * @extends Automaton
51 * @headerfile <seqan/graph_type.h>
52 * @brief A special automaton that stores words instead of single characters along its edges.
53 *
54 * @signature template <[typename TAlphabet[, typename Spec]]>
55 * class Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> >;
56 *
57 * @tparam TAlphabet The alphabet type.
58 * @tparam TSpec The specializing types.
59 */
60
61 template<typename TAlphabet, typename TSpec>
62 class Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >
63 {
64 public:
65 typedef typename VertexIdHandler<Graph>::Type TVertexIdManager_;
66 typedef typename EdgeIdHandler<Graph>::Type TEdgeIdManager_;
67 typedef typename VertexDescriptor<Graph>::Type TVertexDescriptor_;
68 typedef typename EdgeType<Graph>::Type TEdge_;
69
70 String<AutomatonEdgeArray<TEdge_, TAlphabet> > data_vertex; // List of tables
71 TVertexIdManager_ data_id_managerV;
72 TEdgeIdManager_ data_id_managerE;
73 TVertexDescriptor_ data_root;
74
75
76 //____________________________________________________________________________
77
78
Graph()79 Graph() : data_root(0) {
80 SEQAN_CHECKPOINT
81 }
82
83
~Graph()84 ~Graph() {
85 SEQAN_CHECKPOINT
86 clear(*this);
87 }
88
Graph(Graph const & _other)89 Graph(Graph const & _other)
90 {
91 SEQAN_CHECKPOINT
92 _copyGraph(_other, *this);
93 }
94
95 Graph const& operator = (Graph const & _other) {
96 SEQAN_CHECKPOINT
97 if (this == &_other) return *this;
98 _copyGraph(_other, *this);
99 return *this;
100 }
101 };
102
103
104 //////////////////////////////////////////////////////////////////////////////
105
106 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor>
107 inline typename EdgeDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
addEdge(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> & g,TVertexDescriptor const source,TVertexDescriptor const target,String<TAlphabet> const & label)108 addEdge(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >& g,
109 TVertexDescriptor const source,
110 TVertexDescriptor const target,
111 String<TAlphabet> const & label)
112 {
113 SEQAN_CHECKPOINT;
114 SEQAN_ASSERT(idInUse(g.data_id_managerV, source));
115 SEQAN_ASSERT(idInUse(g.data_id_managerV, target));
116
117 typedef Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > TGraph;
118 typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
119 typedef typename Id<TGraph>::Type TId;
120
121 TAlphabet firstChar = getValue(label, 0);
122 TEdgeDescriptor e = findEdge(g, source, firstChar);
123 TId id = obtainId(g.data_id_managerE);
124 _assignId(e, id);
125 assignTarget(e, target);
126 String<TAlphabet> suf(suffix(label,1));
127 assignCargo(e, suf);
128 return e;
129 }
130
131 //////////////////////////////////////////////////////////////////////////////
132
133 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TChars>
134 inline typename EdgeDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
addEdge(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> & g,TVertexDescriptor const source,TVertexDescriptor const target,TChars const * chars)135 addEdge(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >& g,
136 TVertexDescriptor const source,
137 TVertexDescriptor const target,
138 TChars const* chars)
139 {
140 SEQAN_CHECKPOINT
141 return addEdge(g,source,target,String<TAlphabet>(chars));
142 }
143
144 //////////////////////////////////////////////////////////////////////////////
145
146 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TLabel, typename TEdgeCargo>
147 inline typename EdgeDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
addEdge(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> &,TVertexDescriptor const,TVertexDescriptor const,TLabel const,TEdgeCargo const)148 addEdge(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >& /*g*/,
149 TVertexDescriptor const /*source*/,
150 TVertexDescriptor const /*target*/,
151 TLabel const /*label*/,
152 TEdgeCargo const /*cargo*/)
153 {
154 // No additional cargo allowed. Cargo is used for the words in the graph.
155 // Use external property map.
156 SEQAN_ASSERT_FAIL("No additional cargo allowed. Cargo is used for the words in the graph. Use external property map.");
157 }
158
159 //////////////////////////////////////////////////////////////////////////////
160
161 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor>
162 inline void
removeEdge(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> & g,TVertexDescriptor const source,TVertexDescriptor const target,String<TAlphabet> const & label)163 removeEdge(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >& g,
164 TVertexDescriptor const source,
165 TVertexDescriptor const target,
166 String<TAlphabet> const& label)
167 {
168 SEQAN_CHECKPOINT;
169 (void)target; // In case it is compiled without assertions.
170 SEQAN_ASSERT(idInUse(g.data_id_managerV, source));
171 SEQAN_ASSERT(idInUse(g.data_id_managerV, target));
172
173 TAlphabet firstChar = getValue(label, 0);
174 removeEdge(g, findEdge(g,source, firstChar));
175 }
176
177 //////////////////////////////////////////////////////////////////////////////
178
179 template<typename TFile, typename TAlphabet, typename TCargo, typename TSpec>
180 inline void
write(TFile & target,Graph<Automaton<TAlphabet,TCargo,WordGraph<TSpec>>> const & g)181 write(TFile & target,
182 Graph<Automaton<TAlphabet, TCargo, WordGraph<TSpec> > > const& g)
183 {
184 //IOREV _nodoc_
185 typedef Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > TGraph;
186 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
187 typedef typename EdgeType<TGraph>::Type TEdge;
188 typedef typename Size<TAlphabet>::Type TSize;
189 TSize table_length = ValueSize<TAlphabet>::VALUE;
190 TVertexDescriptor nilVal = getNil<TVertexDescriptor>();
191
192 write(target, "WordGraph - Directed:\n");
193 typedef typename Iterator<String<AutomatonEdgeArray<TEdge, TAlphabet> > const, Rooted>::Type TIterConst;
194 for(TIterConst it = begin(g.data_vertex);!atEnd(it);goNext(it)) {
195 if (!idInUse(g.data_id_managerV, position(it))) continue;
196 TVertexDescriptor sourceVertex = position(it);
197 for(TSize i=0;i<table_length;++i) {
198 TEdge const* ed = &g.data_vertex[sourceVertex].data_edge[i];
199 if (getTarget(ed) == nilVal) continue;
200 appendNumber(target, (int)sourceVertex);
201 write(target, "->");
202 appendNumber(target, (int)getTarget(ed));
203 writeValue(target, ' ');
204 writeValue(target, ' ');
205 write(target, "Label: ");
206 writeValue(target, TAlphabet(i));
207 write(target, CharString(getCargo(ed)));
208 writeValue(target, '\n');
209 }
210 }
211 }
212
213 //////////////////////////////////////////////////////////////////////////////
214
215 /*!
216 * @fn WordGraph#getSuccessor
217 * @brief Gets the successor for a given vertex and an edge label.
218 *
219 * @signature TVertexDescriptor getSuccessor(a, v, str);
220 *
221 * @param[in] a The WordGraph to query for its successor.
222 * @param[in] v The descriptor fo the vertex to get the successor for.
223 * @param[in] str The label.
224 *
225 * @return TVertexDescriptor A vertex descriptor or nil if successor is not defined.
226 *
227 * @see WordGraph#parseString
228 * @see Graph#getNil
229 */
230
231 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TCharacters>
232 inline typename VertexDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
getSuccessor(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> const & g,TVertexDescriptor vertex,TCharacters const & chars)233 getSuccessor(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > const& g,
234 TVertexDescriptor vertex,
235 TCharacters const& chars)
236 {
237 SEQAN_ASSERT(idInUse(g.data_id_managerV, vertex));
238 typedef Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > TGraph;
239 typedef typename EdgeType<TGraph>::Type TEdgeStump;
240 TEdgeStump* ed = findEdge(g, vertex, getValue(chars, 0));
241 if (getCargo(ed) == suffix(chars, 1)) {
242 return getTarget(ed);
243 } else {
244 return getNil<TVertexDescriptor>();
245 }
246 }
247
248 //////////////////////////////////////////////////////////////////////////////
249
250 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TCharacters>
251 inline typename VertexDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
getSuccessor(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> const & g,TVertexDescriptor vertex,TCharacters const * chars)252 getSuccessor(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > const& g,
253 TVertexDescriptor vertex,
254 TCharacters const* chars)
255 {
256 SEQAN_CHECKPOINT
257 return getSuccessor(g,vertex,String<TAlphabet>(chars));
258 }
259
260 //////////////////////////////////////////////////////////////////////////////
261
262 /*!
263 * @fn WordGraph#parseString
264 * @brief Parses a string one character at a time and moves accordingly in the WordGraph.
265 *
266 * @signature TVertexDescriptor parseString(a, v, beginIt, endIt);
267 * @signature TVertexDescriptor parseString(a, v, str);
268 *
269 * @param[in] a An WordGraph.
270 * @param[in] v The descriptor of the vertex to start at.
271 * @param[in] str The @link ContainerConcept @endlink to parse.
272 * @param[in,out] beginIt Begin iterator to sequence to parse. Set to the first character that could not be parsed
273 * or to the value of endIt if all of the string was parsed.
274 * @param[in] endIt End iterator to sequence to parse.
275 *
276 * @return TVertexDescriptor The vertex descriptor of the state that was reached after parsing.
277 *
278 * The parsing stops before @link WordGraph#getSuccessor @endlink reaches the nil state or if the complete sequence is
279 * read.
280 */
281
282 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TIterator>
283 inline typename VertexDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
parseString(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> const & g,TVertexDescriptor const vertex,TIterator beginIt,TIterator endIt)284 parseString(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > const& g,
285 TVertexDescriptor const vertex,
286 TIterator beginIt,
287 TIterator endIt)
288 {
289 SEQAN_CHECKPOINT;
290 SEQAN_ASSERT(idInUse(g.data_id_managerV, vertex));
291 typedef Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > TGraph;
292 typedef typename Size<TGraph>::Type TSize;
293 TVertexDescriptor nilVal = getNil<TVertexDescriptor>();
294 TVertexDescriptor succ = vertex;
295 while (beginIt!=endIt) {
296 String<TAlphabet> label(*beginIt);
297 TSize range = 1;
298 TVertexDescriptor tmp = getSuccessor(g,succ,label);
299 while ((tmp == nilVal) &&
300 (beginIt+range != endIt))
301 {
302 appendValue(label, *(beginIt + range));
303 tmp = getSuccessor(g,succ,label);
304 ++range;
305 }
306 if (tmp == nilVal) break;
307 succ = tmp;
308 beginIt = beginIt+range;
309 }
310 return succ;
311 }
312
313 /*!
314 * @fn WordGraph#canParseString
315 * @brief Test whether an WordGraph can parse a string completely.
316 *
317 * @signature bool canParseString(a[, v], str);
318 *
319 * @param[in] a The WordGraph to use for parsing.
320 * @param[in] v Optionally, the descriptor of the vertex to start at. Defaults to the root.
321 * @param[in] str The string to parse.
322 *
323 * @return bool <tt>true</tt> if the WordGraph parses <tt>str</tt> , starting at <tt>v</tt>, completely and
324 * <tt>false</tt> otherwise.
325 *
326 * @section Remarks
327 *
328 * This has not implemented yet.
329 */
330
331 // TODO(holtgrew): Not implemented yet!
332
333
334 }// namespace SEQAN_NAMESPACE_MAIN
335
336 #endif //#ifndef SEQAN_HEADER_...
337