1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: David Weese <david.weese@fu-berlin.de>
33 // ==========================================================================
34 
35 #ifndef SEQAN_HEADER_INDEX_BWT_H
36 #define SEQAN_HEADER_INDEX_BWT_H
37 
38 namespace seqan
39 {
40 
41 //namespace SEQAN_NAMESPACE_PIPELINING
42 //{
43 
44     struct Bwt {};
45 
46 
47     //////////////////////////////////////////////////////////////////////////////
48     // external Bwt algorithm
49     //////////////////////////////////////////////////////////////////////////////
50 
51 
52     template < typename TTextInput, typename TSuffixArrayInput >
53     struct Value< Pipe< Bundle2< TTextInput, TSuffixArrayInput >, Bwt > > {
54         typedef typename Value<TTextInput>::Type Type;
55     };
56 
57     //////////////////////////////////////////////////////////////////////////////
58     // bwt class
59     template < typename TTextInput, typename TSuffixArrayInput >
60     struct Pipe< Bundle2< TTextInput, TSuffixArrayInput >, Bwt >
61     {
62         // *** SPECIALIZATION ***
63 
64         typedef Pipe< TSuffixArrayInput, Counter > TSA;
65                                         typedef typename Size<TTextInput>::Type    TSize;
66         typedef Pool< TypeOf_(TSA), MapperSpec< MapperConfigSize< filterI1<TypeOf_(TSA)>, TSize> > > TInverter;
67         typedef Pipe< TInverter, Filter< filterI2<TypeOf_(TInverter)> > > TCounterFilter;
68         typedef Pipe< TTextInput, Shifter< -1, false > > TShiftText;
69 
70         typedef Pipe< Bundle2< TCounterFilter, TShiftText >, Joiner > TJoiner;
71         typedef Pool< TypeOf_(TJoiner), MapperSpec< MapperConfigSize< filterI1<TypeOf_(TJoiner)>, TSize> > > TLinearMapper;
72         typedef Pipe< TLinearMapper, Filter< filterI2<TypeOf_(TLinearMapper)> > > TFilter;
73 
74         TLinearMapper        mapper;
75         TFilter                in;
76 
77         Pipe():
78             in(mapper) {}
79 
80         Pipe(Bundle2< TTextInput, TSuffixArrayInput > const &_bundleIn):
81             in(mapper)
82         {
83             process(_bundleIn.in1, _bundleIn.in2);
84         }
85 
86         template < typename TTextInput_, typename TSuffixArrayInput_ >
87         bool process(TTextInput_ &textIn, TSuffixArrayInput_ &suffixArrayIn) {
88 
89             // *** INSTANTIATION ***
90 
91             TSA                            sa(suffixArrayIn);
92             TInverter                    inverter;
93             TCounterFilter                filter(inverter);
94 
95             #ifdef SEQAN_DEBUG_INDEX
96                 std::cerr << "  invert suffix array" << std::endl;
97             #endif
98             inverter << sa;
99             SEQAN_PROMARK("Suffix-Array invertiert");
100 
101             TShiftText                    shifter(textIn);
102             TJoiner                        joiner(bundle2(filter, shifter));
103 
104             #ifdef SEQAN_DEBUG_INDEX
105                 std::cerr << "  de-invert suffix array" << std::endl;
106             #endif
107             mapper << joiner;
108             SEQAN_PROMARK("Suffix-Array linearisiert");
109 
110             return true;
111         }
112 
113         inline typename Value<Pipe>::Type const operator*() const {
114             return *in;
115         }
116 
117         inline Pipe& operator++() {
118             ++in;
119             return *this;
120         }
121     };
122 
123     // not sure which interface is more intuitive, we support both
124     // you can call "skew << pipe" or "skew_t skew(pipe); skew.process()"
125     // for the first we would need no _in member
126     template < typename TInput, typename TTextInput_, typename TSuffixArrayInput_ >
127     inline bool operator<<(Pipe< TInput, Bwt > &me, Bundle2< TTextInput_, TSuffixArrayInput_ > const &bundleIn) {
128          return me.process(bundleIn.in1, bundleIn.in2);
129     }
130 
131 
132 
133     //////////////////////////////////////////////////////////////////////////////
134     // external Bwt algorithm (optimized for multiple sequences)
135     //////////////////////////////////////////////////////////////////////////////
136 
137 
138     template < typename TTextInput, typename TSuffixArrayInput, typename TPair, typename TLimitsString >
139     struct Value< Pipe< Bundle2< TTextInput, TSuffixArrayInput >, Multi<Bwt, TPair, TLimitsString> > > {
140         typedef typename Value<TTextInput>::Type Type;
141     };
142 
143     template <typename InType, typename TLimitsString, typename Result = typename Value<TLimitsString>::Type>
144     struct _filterGlobalizer : public std::unary_function<InType,Result> {
145         TLimitsString const &limits;
146         _filterGlobalizer(TLimitsString const &_limits) : limits(_limits) {}
147         inline Result operator()(const InType& x) const
148         {
149             return posGlobalize(x, limits);
150         }
151     };
152 
153 
154     //////////////////////////////////////////////////////////////////////////////
155     // bwt class
156     template < typename TTextInput, typename TSuffixArrayInput, typename TPair, typename TLimitsString >
157     struct Pipe< Bundle2< TTextInput, TSuffixArrayInput >, Multi<Bwt, TPair, TLimitsString> >
158     {
159         // *** SPECIALIZATION ***
160 
161                                         typedef _filterGlobalizer<TypeOf_(TSuffixArrayInput), TLimitsString, TSizeOf_(TSuffixArrayInput)> filter_globalizer_t;
162         typedef Pipe< TSuffixArrayInput, Filter<filter_globalizer_t> > TGlobalizer;
163         typedef Pipe< TGlobalizer, Counter > TSA;
164                                         typedef typename Size<TTextInput>::Type    TSize;
165         typedef Pool< TypeOf_(TSA), MapperSpec< MapperConfigSize< filterI1<TypeOf_(TSA)>, TSize> > > TInverter;
166         typedef Pipe< TInverter, Filter< filterI2<TypeOf_(TInverter)> > > TCounterFilter;
167         typedef Pipe< TTextInput, Shifter< -1, false > > TShiftText;
168 
169         typedef Pipe< Bundle2< TCounterFilter, TShiftText >, Joiner > TJoiner;
170         typedef Pool< TypeOf_(TJoiner), MapperSpec< MapperConfigSize< filterI1<TypeOf_(TJoiner)>, TSize> > > TLinearMapper;
171         typedef Pipe< TLinearMapper, Filter< filterI2<TypeOf_(TLinearMapper)> > > TFilter;
172 
173         TTextInput            *textIn;
174         TSuffixArrayInput    *suffixArrayIn;
175         TLinearMapper        mapper;
176         TFilter                in;
177 
178         TLimitsString const    &limits;
179 
180         Pipe(TLimitsString const &_limits):
181             in(mapper),
182             limits(_limits)    {}
183 
184         Pipe(Bundle2< TTextInput, TSuffixArrayInput > const &_bundleIn, TLimitsString const &_limits):
185             textIn(&_bundleIn.in1),
186             suffixArrayIn(&_bundleIn.in2),
187             in(mapper),
188             limits(_limits)
189         {
190             process();
191         }
192 
193         inline void process() {
194             process(*textIn, *suffixArrayIn);
195         }
196 
197         template < typename TTextInput_, typename TSuffixArrayInput_ >
198         bool process(TTextInput_ &textIn, TSuffixArrayInput_ &suffixArrayIn) {
199 
200             // *** INSTANTIATION ***
201 
202             for(int i=0;i<length(limits);++i)
203                 std::cout << limits[i]<<"  ";
204             std::cout<<std::endl;
205 
206             TGlobalizer                    globalizer(suffixArrayIn, limits);
207             TSA                            sa(globalizer);
208             TInverter                    inverter;
209             TCounterFilter                filter(inverter);
210 
211             #ifdef SEQAN_DEBUG_INDEX
212                 std::cerr << "  invert suffix array" << std::endl;
213             #endif
214             inverter << sa;
215             SEQAN_PROMARK("Suffix-Array invertiert");
216 
217             TShiftText                    shifter(textIn);
218             TJoiner                        joiner(bundle2(filter, shifter));
219 
220             #ifdef SEQAN_DEBUG_INDEX
221                 std::cerr << "  de-invert suffix array" << std::endl;
222             #endif
223             mapper << joiner;
224             SEQAN_PROMARK("Suffix-Array linearisiert");
225 
226             return true;
227         }
228 
229         inline typename Value<Pipe>::Type const operator*() const {
230             return *in;
231         }
232 
233         inline Pipe& operator++() {
234             ++in;
235             return *this;
236         }
237     };
238 
239     // not sure which interface is more intuitive, we support both
240     // you can call "bwt << pipe" or "bwt_t bwt(pipe); bwt.process()"
241     // for the first we would need no _in member
242     template < typename TInput, typename TTextInput_, typename TSuffixArrayInput_, typename TPair, typename TLimitsString >
243     inline bool operator<<(Pipe< TInput, Multi<Bwt, TPair, TLimitsString> > &me, Bundle2< TTextInput_, TSuffixArrayInput_ > const &bundleIn) {
244          return me.process(bundleIn.in1, bundleIn.in2);
245     }
246 
247 
248 
249     //////////////////////////////////////////////////////////////////////////////
250     // internal Bwt algorithm
251     //////////////////////////////////////////////////////////////////////////////
252 
253 
254     template < typename TBWT,
255                typename TText,
256                typename TSA >
257     void createBWTableInt(
258         TBWT &bwt,
259         TText const &s,
260         TSA const &SA)
261     {
262         typedef typename Value<TBWT>::Type        TValue;
263         typedef typename GetValue<TSA>::Type    TSAValue;
264         typedef typename Size<TSA>::Type        TSize;
265 
266         TSize n = length(s);
267 
268         for (TSize i = 0; i < n; ++i)
269         {
270             TSAValue sa = getValue(SA, i);
271             if (sa != 0)
272                 bwt[i] = getValue(s, sa - 1);
273             else
274                 bwt[i] = TValue();
275 //                bwt[i] = getValue(s, n - 1);
276         }
277     }
278 
279     template < typename TBWT,
280                typename TString,
281                typename TSpec,
282                typename TSA >
283     void createBWTableInt(
284         TBWT &bwt,
285         StringSet<TString, TSpec> const &s,
286         TSA const &SA)
287     {
288         typedef typename Value<TBWT>::Type        TValue;
289         typedef typename Size<TSA>::Type        TSize;
290 
291         TSize n = lengthSum(s);
292         Pair<unsigned, typename Size<TString>::Type> loc;
293 
294         for (TSize i = 0; i < n; ++i)
295         {
296             posLocalize(loc, getValue(SA, i), stringSetLimits(s));
297             if (loc.i2 != 0)
298                 bwt[i] = s[loc.i1][loc.i2 - 1];
299             else
300                 if (loc.i1 != 0)
301                     bwt[i] = s[loc.i1 - 1][length(s[loc.i1 - 1]) - 1];
302                 else
303                     bwt[i] = TValue();
304 //                    bwt[i] = getValue(s, n - 1);
305         }
306     }
307 
308 //}
309 
310 }
311 
312 #endif
313