1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 
33 #ifndef SEQAN_HEADER_GRAPH_IMPL_WORDGRAPH_H
34 #define SEQAN_HEADER_GRAPH_IMPL_WORDGRAPH_H
35 
36 namespace SEQAN_NAMESPACE_MAIN
37 {
38 
39 template <typename TSpec = Default>
40 struct WordGraph;
41 
42 //////////////////////////////////////////////////////////////////////////////
43 // Graph - WordGraph
44 //////////////////////////////////////////////////////////////////////////////
45 
46 //////////////////////////////////////////////////////////////////////////////
47 
48 /*!
49  * @class WordGraph
50  * @extends Automaton
51  * @headerfile <seqan/graph_type.h>
52  * @brief A special automaton that stores words instead of single characters along its edges.
53  *
54  * @signature template <[typename TAlphabet[, typename Spec]]>
55  *            class Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> >;
56  *
57  * @tparam TAlphabet The alphabet type.
58  * @tparam TSpec     The specializing types.
59  */
60 
61 template<typename TAlphabet, typename TSpec>
62 class Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >
63 {
64     public:
65         typedef typename VertexIdHandler<Graph>::Type TVertexIdManager_;
66         typedef typename EdgeIdHandler<Graph>::Type TEdgeIdManager_;
67         typedef typename VertexDescriptor<Graph>::Type TVertexDescriptor_;
68         typedef typename EdgeType<Graph>::Type TEdge_;
69 
70         String<AutomatonEdgeArray<TEdge_, TAlphabet> > data_vertex;        // List of tables
71         TVertexIdManager_ data_id_managerV;
72         TEdgeIdManager_ data_id_managerE;
73         TVertexDescriptor_ data_root;
74 
75 
76 //____________________________________________________________________________
77 
78 
Graph()79         Graph() : data_root(0) {
80             SEQAN_CHECKPOINT
81         }
82 
83 
~Graph()84         ~Graph() {
85             SEQAN_CHECKPOINT
86             clear(*this);
87         }
88 
Graph(Graph const & _other)89         Graph(Graph const & _other)
90         {
91             SEQAN_CHECKPOINT
92             _copyGraph(_other, *this);
93         }
94 
95         Graph const& operator = (Graph const & _other) {
96             SEQAN_CHECKPOINT
97             if (this == &_other) return *this;
98             _copyGraph(_other, *this);
99             return *this;
100         }
101 };
102 
103 
104 //////////////////////////////////////////////////////////////////////////////
105 
106 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor>
107 inline typename EdgeDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
addEdge(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> & g,TVertexDescriptor const source,TVertexDescriptor const target,String<TAlphabet> const & label)108 addEdge(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >& g,
109         TVertexDescriptor const source,
110         TVertexDescriptor const target,
111         String<TAlphabet> const & label)
112 {
113     SEQAN_CHECKPOINT;
114     SEQAN_ASSERT(idInUse(g.data_id_managerV, source));
115     SEQAN_ASSERT(idInUse(g.data_id_managerV, target));
116 
117     typedef Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > TGraph;
118     typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
119     typedef typename Id<TGraph>::Type TId;
120 
121     TAlphabet firstChar = getValue(label, 0);
122     TEdgeDescriptor e = findEdge(g, source, firstChar);
123     TId id = obtainId(g.data_id_managerE);
124     _assignId(e, id);
125     assignTarget(e, target);
126     String<TAlphabet> suf(suffix(label,1));
127     assignCargo(e, suf);
128     return e;
129 }
130 
131 //////////////////////////////////////////////////////////////////////////////
132 
133 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TChars>
134 inline typename EdgeDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
addEdge(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> & g,TVertexDescriptor const source,TVertexDescriptor const target,TChars const * chars)135 addEdge(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >& g,
136         TVertexDescriptor const source,
137         TVertexDescriptor const target,
138         TChars const* chars)
139 {
140     SEQAN_CHECKPOINT
141     return addEdge(g,source,target,String<TAlphabet>(chars));
142 }
143 
144 //////////////////////////////////////////////////////////////////////////////
145 
146 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TLabel, typename TEdgeCargo>
147 inline typename EdgeDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
addEdge(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> &,TVertexDescriptor const,TVertexDescriptor const,TLabel const,TEdgeCargo const)148 addEdge(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >& /*g*/,
149         TVertexDescriptor const /*source*/,
150         TVertexDescriptor const /*target*/,
151         TLabel const /*label*/,
152         TEdgeCargo const /*cargo*/)
153 {
154     // No additional cargo allowed. Cargo is used for the words in the graph.
155     // Use external property map.
156     SEQAN_ASSERT_FAIL("No additional cargo allowed. Cargo is used for the words in the graph. Use external property map.");
157 }
158 
159 //////////////////////////////////////////////////////////////////////////////
160 
161 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor>
162 inline void
removeEdge(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> & g,TVertexDescriptor const source,TVertexDescriptor const target,String<TAlphabet> const & label)163 removeEdge(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > >& g,
164         TVertexDescriptor const source,
165         TVertexDescriptor const target,
166         String<TAlphabet> const& label)
167 {
168     SEQAN_CHECKPOINT;
169     (void)target;  // In case it is compiled without assertions.
170     SEQAN_ASSERT(idInUse(g.data_id_managerV, source));
171     SEQAN_ASSERT(idInUse(g.data_id_managerV, target));
172 
173     TAlphabet firstChar = getValue(label, 0);
174     removeEdge(g, findEdge(g,source, firstChar));
175 }
176 
177 //////////////////////////////////////////////////////////////////////////////
178 
179 template<typename TFile, typename TAlphabet, typename TCargo, typename TSpec>
180 inline void
write(TFile & target,Graph<Automaton<TAlphabet,TCargo,WordGraph<TSpec>>> const & g)181 write(TFile & target,
182       Graph<Automaton<TAlphabet, TCargo, WordGraph<TSpec> > > const& g)
183 {
184 //IOREV _nodoc_
185     typedef Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > TGraph;
186     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
187     typedef typename EdgeType<TGraph>::Type TEdge;
188     typedef typename Size<TAlphabet>::Type TSize;
189     TSize table_length = ValueSize<TAlphabet>::VALUE;
190     TVertexDescriptor nilVal = getNil<TVertexDescriptor>();
191 
192     write(target, "WordGraph - Directed:\n");
193     typedef typename Iterator<String<AutomatonEdgeArray<TEdge, TAlphabet> > const, Rooted>::Type TIterConst;
194     for(TIterConst it = begin(g.data_vertex);!atEnd(it);goNext(it)) {
195         if (!idInUse(g.data_id_managerV, position(it))) continue;
196         TVertexDescriptor sourceVertex = position(it);
197         for(TSize i=0;i<table_length;++i) {
198             TEdge const* ed = &g.data_vertex[sourceVertex].data_edge[i];
199             if (getTarget(ed) ==  nilVal) continue;
200             appendNumber(target, (int)sourceVertex);
201             write(target, "->");
202             appendNumber(target, (int)getTarget(ed));
203             writeValue(target, ' ');
204             writeValue(target, ' ');
205             write(target, "Label: ");
206             writeValue(target, TAlphabet(i));
207             write(target, CharString(getCargo(ed)));
208             writeValue(target, '\n');
209         }
210     }
211 }
212 
213 //////////////////////////////////////////////////////////////////////////////
214 
215 /*!
216  * @fn WordGraph#getSuccessor
217  * @brief Gets the successor for a given vertex and an edge label.
218  *
219  * @signature TVertexDescriptor getSuccessor(a, v, str);
220  *
221  * @param[in] a   The WordGraph to query for its successor.
222  * @param[in] v   The descriptor fo the vertex to get the successor for.
223  * @param[in] str The label.
224  *
225  * @return TVertexDescriptor A vertex descriptor or nil if successor is not defined.
226  *
227  * @see WordGraph#parseString
228  * @see Graph#getNil
229  */
230 
231 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TCharacters>
232 inline typename VertexDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
getSuccessor(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> const & g,TVertexDescriptor vertex,TCharacters const & chars)233 getSuccessor(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > const& g,
234              TVertexDescriptor vertex,
235              TCharacters const& chars)
236 {
237     SEQAN_ASSERT(idInUse(g.data_id_managerV, vertex));
238     typedef Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > TGraph;
239     typedef typename EdgeType<TGraph>::Type TEdgeStump;
240     TEdgeStump* ed = findEdge(g, vertex, getValue(chars, 0));
241     if (getCargo(ed) == suffix(chars, 1)) {
242         return getTarget(ed);
243     } else {
244         return getNil<TVertexDescriptor>();
245     }
246 }
247 
248 //////////////////////////////////////////////////////////////////////////////
249 
250 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TCharacters>
251 inline typename VertexDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
getSuccessor(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> const & g,TVertexDescriptor vertex,TCharacters const * chars)252 getSuccessor(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > const& g,
253              TVertexDescriptor vertex,
254              TCharacters const* chars)
255 {
256     SEQAN_CHECKPOINT
257     return getSuccessor(g,vertex,String<TAlphabet>(chars));
258 }
259 
260 //////////////////////////////////////////////////////////////////////////////
261 
262 /*!
263  * @fn WordGraph#parseString
264  * @brief Parses a string one character at a time and moves accordingly in the WordGraph.
265  *
266  * @signature TVertexDescriptor parseString(a, v, beginIt, endIt);
267  * @signature TVertexDescriptor parseString(a, v, str);
268  *
269  * @param[in]     a       An WordGraph.
270  * @param[in]     v       The descriptor of the vertex to start at.
271  * @param[in]     str     The @link ContainerConcept @endlink to parse.
272  * @param[in,out] beginIt Begin iterator to sequence to parse.  Set to the first character that could not be parsed
273  *                        or to the value of endIt if all of the string was parsed.
274  * @param[in]     endIt   End iterator to sequence to parse.
275  *
276  * @return TVertexDescriptor The vertex descriptor of the state that was reached after parsing.
277  *
278  * The parsing stops before @link WordGraph#getSuccessor @endlink reaches the nil state or if the complete sequence is
279  * read.
280  */
281 
282 template<typename TAlphabet, typename TSpec, typename TVertexDescriptor, typename TIterator>
283 inline typename VertexDescriptor<Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > >::Type
parseString(Graph<Automaton<TAlphabet,String<TAlphabet>,WordGraph<TSpec>>> const & g,TVertexDescriptor const vertex,TIterator beginIt,TIterator endIt)284 parseString(Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > const& g,
285             TVertexDescriptor const vertex,
286             TIterator beginIt,
287             TIterator endIt)
288 {
289     SEQAN_CHECKPOINT;
290     SEQAN_ASSERT(idInUse(g.data_id_managerV, vertex));
291     typedef Graph<Automaton<TAlphabet, String<TAlphabet>, WordGraph<TSpec> > > TGraph;
292     typedef typename Size<TGraph>::Type TSize;
293     TVertexDescriptor nilVal = getNil<TVertexDescriptor>();
294     TVertexDescriptor succ = vertex;
295     while (beginIt!=endIt) {
296         String<TAlphabet> label(*beginIt);
297         TSize range = 1;
298         TVertexDescriptor tmp = getSuccessor(g,succ,label);
299         while ((tmp == nilVal) &&
300                 (beginIt+range != endIt))
301         {
302             appendValue(label, *(beginIt + range));
303             tmp = getSuccessor(g,succ,label);
304             ++range;
305         }
306         if (tmp == nilVal) break;
307         succ = tmp;
308         beginIt = beginIt+range;
309     }
310     return succ;
311 }
312 
313 /*!
314  * @fn WordGraph#canParseString
315  * @brief Test whether an WordGraph can parse a string completely.
316  *
317  * @signature bool canParseString(a[, v], str);
318  *
319  * @param[in] a   The WordGraph to use for parsing.
320  * @param[in] v   Optionally, the descriptor of the vertex to start at.  Defaults to the root.
321  * @param[in] str The string to parse.
322  *
323  * @return bool <tt>true</tt> if the WordGraph parses <tt>str</tt> , starting at <tt>v</tt>, completely and
324  *              <tt>false</tt> otherwise.
325  *
326  * @section Remarks
327  *
328  * This has not implemented yet.
329  */
330 
331 // TODO(holtgrew): Not implemented yet!
332 
333 
334 }// namespace SEQAN_NAMESPACE_MAIN
335 
336 #endif //#ifndef SEQAN_HEADER_...
337