1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32
33 #ifndef SEQAN_HEADER_GRAPH_IMPL_WORDGRAPH_H
34 #define SEQAN_HEADER_GRAPH_IMPL_WORDGRAPH_H
35
36 namespace seqan
37 {
38
39 template <typename TSpec = Default>
40 struct WordGraph;
41
42 //////////////////////////////////////////////////////////////////////////////
43 // Graph - WordGraph
44 //////////////////////////////////////////////////////////////////////////////
45
46 //////////////////////////////////////////////////////////////////////////////
47
48 /*!
49 * @class WordGraph
50 * @extends Automaton
51 * @headerfile <seqan/graph_type.h>
52 * @brief A special automaton that stores words instead of single characters along its edges.
53 *
54 * @signature template <[typename TAlphabet[, typename Spec]]>
55 * class Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> >;
56 *
57 * @tparam TAlphabet The alphabet type.
58 * @tparam TSpec The specializing types.
59 */
60
61 template<typename TAlphabet, typename TSpec>
62 class Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >
63 {
64 public:
65 typedef typename VertexIdHandler<Graph>::Type TVertexIdManager_;
66 typedef typename EdgeIdHandler<Graph>::Type TEdgeIdManager_;
67 typedef typename VertexDescriptor<Graph>::Type TVertexDescriptor_;
68 typedef typename EdgeType<Graph>::Type TEdge_;
69
70 String<AutomatonEdgeArray<TEdge_, TAlphabet> > data_vertex; // List of tables
71 TVertexIdManager_ data_id_managerV;
72 TEdgeIdManager_ data_id_managerE;
73 TVertexDescriptor_ data_root;
74
75
76 //____________________________________________________________________________
77
78
Graph()79 Graph() : data_root(0) {
80 }
81
82
~Graph()83 ~Graph() {
84 clear(*this);
85 }
86
Graph(Graph const & _other)87 Graph(Graph const & _other)
88 {
89 _copyGraph(_other, *this);
90 }
91
92 Graph const& operator = (Graph const & _other) {
93 if (this == &_other) return *this;
94 _copyGraph(_other, *this);
95 return *this;
96 }
97 };
98
99
100 //////////////////////////////////////////////////////////////////////////////
101
102 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor>
103 inline typename EdgeDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
addEdge(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> & g,TVertexDescriptor const source,TVertexDescriptor const target,String<TAlphabet> const & label)104 addEdge(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >& g,
105 TVertexDescriptor const source,
106 TVertexDescriptor const target,
107 String<TAlphabet> const & label)
108 {
109 SEQAN_ASSERT(idInUse(g.data_id_managerV, source));
110 SEQAN_ASSERT(idInUse(g.data_id_managerV, target));
111
112 typedef Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > TGraph;
113 typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
114 typedef typename Id<TGraph>::Type TId;
115
116 TAlphabet firstChar = getValue(label, 0);
117 TEdgeDescriptor e = findEdge(g, source, firstChar);
118 TId id = obtainId(g.data_id_managerE);
119 _assignId(e, id);
120 assignTarget(e, target);
121 String<TAlphabet> suf(suffix(label,1));
122 assignCargo(e, suf);
123 return e;
124 }
125
126 //////////////////////////////////////////////////////////////////////////////
127
128 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TChars>
129 inline typename EdgeDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
addEdge(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> & g,TVertexDescriptor const source,TVertexDescriptor const target,TChars const * chars)130 addEdge(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >& g,
131 TVertexDescriptor const source,
132 TVertexDescriptor const target,
133 TChars const* chars)
134 {
135 return addEdge(g,source,target,String<TAlphabet>(chars));
136 }
137
138 //////////////////////////////////////////////////////////////////////////////
139
140 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TLabel, typename TEdgeCargo>
141 inline typename EdgeDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
addEdge(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> &,TVertexDescriptor const,TVertexDescriptor const,TLabel const,TEdgeCargo const)142 addEdge(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >& /*g*/,
143 TVertexDescriptor const /*source*/,
144 TVertexDescriptor const /*target*/,
145 TLabel const /*label*/,
146 TEdgeCargo const /*cargo*/)
147 {
148 // No additional cargo allowed. Cargo is used for the words in the graph.
149 // Use external property map.
150 SEQAN_ASSERT_FAIL("No additional cargo allowed. Cargo is used for the words in the graph. Use external property map.");
151 }
152
153 //////////////////////////////////////////////////////////////////////////////
154
155 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor>
156 inline void
removeEdge(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> & g,TVertexDescriptor const source,TVertexDescriptor const target,String<TAlphabet> const & label)157 removeEdge(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >& g,
158 TVertexDescriptor const source,
159 TVertexDescriptor const target,
160 String<TAlphabet> const& label)
161 {
162 (void)target; // In case it is compiled without assertions.
163 SEQAN_ASSERT(idInUse(g.data_id_managerV, source));
164 SEQAN_ASSERT(idInUse(g.data_id_managerV, target));
165
166 TAlphabet firstChar = getValue(label, 0);
167 removeEdge(g, findEdge(g,source, firstChar));
168 }
169
170 //////////////////////////////////////////////////////////////////////////////
171
172 template<typename TFile, typename TAlphabet, typename TCargo, typename TSpec>
173 inline void
write(TFile & target,Graph<Automaton<TAlphabet,TCargo,WordGraph<TSpec>>> const & g)174 write(TFile & target,
175 Graph<Automaton<TAlphabet, TCargo, WordGraph<TSpec> > > const& g)
176 {
177 //IOREV _nodoc_
178 typedef Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > TGraph;
179 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
180 typedef typename EdgeType<TGraph>::Type TEdge;
181 typedef typename Size<TAlphabet>::Type TSize;
182 TSize table_length = ValueSize<TAlphabet>::VALUE;
183 TVertexDescriptor nilVal = getNil<TVertexDescriptor>();
184
185 write(target, "WordGraph - Directed:\n");
186 typedef typename Iterator<String<AutomatonEdgeArray<TEdge, TAlphabet> > const, Rooted>::Type TIterConst;
187 for(TIterConst it = begin(g.data_vertex);!atEnd(it);goNext(it)) {
188 if (!idInUse(g.data_id_managerV, position(it))) continue;
189 TVertexDescriptor sourceVertex = position(it);
190 for(TSize i=0;i<table_length;++i) {
191 TEdge const* ed = &g.data_vertex[sourceVertex].data_edge[i];
192 if (getTarget(ed) == nilVal) continue;
193 appendNumber(target, (int)sourceVertex);
194 write(target, "->");
195 appendNumber(target, (int)getTarget(ed));
196 writeValue(target, ' ');
197 writeValue(target, ' ');
198 write(target, "Label: ");
199 writeValue(target, TAlphabet(i));
200 write(target, CharString(getCargo(ed)));
201 writeValue(target, '\n');
202 }
203 }
204 }
205
206 //////////////////////////////////////////////////////////////////////////////
207
208 /*!
209 * @fn WordGraph#getSuccessor
210 * @brief Gets the successor for a given vertex and an edge label.
211 *
212 * @signature TVertexDescriptor getSuccessor(a, v, str);
213 *
214 * @param[in] a The WordGraph to query for its successor.
215 * @param[in] v The descriptor fo the vertex to get the successor for.
216 * @param[in] str The label.
217 *
218 * @return TVertexDescriptor A vertex descriptor or nil if successor is not defined.
219 *
220 * @see WordGraph#parseString
221 * @see Graph#getNil
222 */
223
224 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TCharacters>
225 inline typename VertexDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
getSuccessor(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> const & g,TVertexDescriptor vertex,TCharacters const & chars)226 getSuccessor(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > const& g,
227 TVertexDescriptor vertex,
228 TCharacters const& chars)
229 {
230 SEQAN_ASSERT(idInUse(g.data_id_managerV, vertex));
231 typedef Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > TGraph;
232 typedef typename EdgeType<TGraph>::Type TEdgeStump;
233 TEdgeStump* ed = findEdge(g, vertex, getValue(chars, 0));
234 if (getCargo(ed) == suffix(chars, 1)) {
235 return getTarget(ed);
236 } else {
237 return getNil<TVertexDescriptor>();
238 }
239 }
240
241 //////////////////////////////////////////////////////////////////////////////
242
243 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TCharacters>
244 inline typename VertexDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
getSuccessor(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> const & g,TVertexDescriptor vertex,TCharacters const * chars)245 getSuccessor(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > const& g,
246 TVertexDescriptor vertex,
247 TCharacters const* chars)
248 {
249 return getSuccessor(g,vertex,String<TAlphabet>(chars));
250 }
251
252 //////////////////////////////////////////////////////////////////////////////
253
254 /*!
255 * @fn WordGraph#parseString
256 * @brief Parses a string one character at a time and moves accordingly in the WordGraph.
257 *
258 * @signature TVertexDescriptor parseString(a, v, beginIt, endIt);
259 * @signature TVertexDescriptor parseString(a, v, str);
260 *
261 * @param[in] a An WordGraph.
262 * @param[in] v The descriptor of the vertex to start at.
263 * @param[in] str The @link ContainerConcept @endlink to parse.
264 * @param[in,out] beginIt Begin iterator to sequence to parse. Set to the first character that could not be parsed
265 * or to the value of endIt if all of the string was parsed.
266 * @param[in] endIt End iterator to sequence to parse.
267 *
268 * @return TVertexDescriptor The vertex descriptor of the state that was reached after parsing.
269 *
270 * The parsing stops before @link WordGraph#getSuccessor @endlink reaches the nil state or if the complete sequence is
271 * read.
272 */
273
274 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TIterator>
275 inline typename VertexDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
parseString(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> const & g,TVertexDescriptor const vertex,TIterator beginIt,TIterator endIt)276 parseString(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > const& g,
277 TVertexDescriptor const vertex,
278 TIterator beginIt,
279 TIterator endIt)
280 {
281 SEQAN_ASSERT(idInUse(g.data_id_managerV, vertex));
282 typedef Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > TGraph;
283 typedef typename Size<TGraph>::Type TSize;
284 TVertexDescriptor nilVal = getNil<TVertexDescriptor>();
285 TVertexDescriptor succ = vertex;
286 while (beginIt!=endIt) {
287 String<TAlphabet> label(*beginIt);
288 TSize range = 1;
289 TVertexDescriptor tmp = getSuccessor(g,succ,label);
290 while ((tmp == nilVal) &&
291 (beginIt+range != endIt))
292 {
293 appendValue(label, *(beginIt + range));
294 tmp = getSuccessor(g,succ,label);
295 ++range;
296 }
297 if (tmp == nilVal) break;
298 succ = tmp;
299 beginIt = beginIt+range;
300 }
301 return succ;
302 }
303
304 /*!
305 * @fn WordGraph#canParseString
306 * @brief Test whether an WordGraph can parse a string completely.
307 *
308 * @signature bool canParseString(a[, v], str);
309 *
310 * @param[in] a The WordGraph to use for parsing.
311 * @param[in] v Optionally, the descriptor of the vertex to start at. Defaults to the root.
312 * @param[in] str The string to parse.
313 *
314 * @return bool <tt>true</tt> if the WordGraph parses <tt>str</tt> , starting at <tt>v</tt>, completely and
315 * <tt>false</tt> otherwise.
316 *
317 * @section Remarks
318 *
319 * This has not implemented yet.
320 */
321
322 // TODO(holtgrew): Not implemented yet!
323
324
325 }// namespace seqan
326
327 #endif //#ifndef SEQAN_HEADER_...
328