1 // ========================================================================== 2 // SeqAn - The Library for Sequence Analysis 3 // ========================================================================== 4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin 5 // All rights reserved. 6 // 7 // Redistribution and use in source and binary forms, with or without 8 // modification, are permitted provided that the following conditions are met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above copyright 13 // notice, this list of conditions and the following disclaimer in the 14 // documentation and/or other materials provided with the distribution. 15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of 16 // its contributors may be used to endorse or promote products derived 17 // from this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE 23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 29 // DAMAGE. 30 // 31 // ========================================================================== 32 // Author: David Weese <david.weese@fu-berlin.de> 33 // ========================================================================== 34 35 #ifndef SEQAN_HEADER_INDEX_BWT_H 36 #define SEQAN_HEADER_INDEX_BWT_H 37 38 namespace seqan 39 { 40 41 //namespace SEQAN_NAMESPACE_PIPELINING 42 //{ 43 44 struct Bwt {}; 45 46 47 ////////////////////////////////////////////////////////////////////////////// 48 // external Bwt algorithm 49 ////////////////////////////////////////////////////////////////////////////// 50 51 52 template < typename TTextInput, typename TSuffixArrayInput > 53 struct Value< Pipe< Bundle2< TTextInput, TSuffixArrayInput >, Bwt > > { 54 typedef typename Value<TTextInput>::Type Type; 55 }; 56 57 ////////////////////////////////////////////////////////////////////////////// 58 // bwt class 59 template < typename TTextInput, typename TSuffixArrayInput > 60 struct Pipe< Bundle2< TTextInput, TSuffixArrayInput >, Bwt > 61 { 62 // *** SPECIALIZATION *** 63 64 typedef Pipe< TSuffixArrayInput, Counter > TSA; 65 typedef typename Size<TTextInput>::Type TSize; 66 typedef Pool< TypeOf_(TSA), MapperSpec< MapperConfigSize< filterI1<TypeOf_(TSA)>, TSize> > > TInverter; 67 typedef Pipe< TInverter, Filter< filterI2<TypeOf_(TInverter)> > > TCounterFilter; 68 typedef Pipe< TTextInput, Shifter< -1, false > > TShiftText; 69 70 typedef Pipe< Bundle2< TCounterFilter, TShiftText >, Joiner > TJoiner; 71 typedef Pool< TypeOf_(TJoiner), MapperSpec< MapperConfigSize< filterI1<TypeOf_(TJoiner)>, TSize> > > TLinearMapper; 72 typedef Pipe< TLinearMapper, Filter< filterI2<TypeOf_(TLinearMapper)> > > TFilter; 73 74 TLinearMapper mapper; 75 TFilter in; 76 77 Pipe(): 78 in(mapper) {} 79 80 Pipe(Bundle2< TTextInput, TSuffixArrayInput > const &_bundleIn): 81 in(mapper) 82 { 83 process(_bundleIn.in1, _bundleIn.in2); 84 } 85 86 template < typename TTextInput_, typename TSuffixArrayInput_ > 87 bool process(TTextInput_ &textIn, TSuffixArrayInput_ &suffixArrayIn) { 88 89 // *** INSTANTIATION *** 90 91 TSA sa(suffixArrayIn); 92 TInverter inverter; 93 TCounterFilter filter(inverter); 94 95 #ifdef SEQAN_DEBUG_INDEX 96 std::cerr << " invert suffix array" << std::endl; 97 #endif 98 inverter << sa; 99 SEQAN_PROMARK("Suffix-Array invertiert"); 100 101 TShiftText shifter(textIn); 102 TJoiner joiner(bundle2(filter, shifter)); 103 104 #ifdef SEQAN_DEBUG_INDEX 105 std::cerr << " de-invert suffix array" << std::endl; 106 #endif 107 mapper << joiner; 108 SEQAN_PROMARK("Suffix-Array linearisiert"); 109 110 return true; 111 } 112 113 inline typename Value<Pipe>::Type const operator*() const { 114 return *in; 115 } 116 117 inline Pipe& operator++() { 118 ++in; 119 return *this; 120 } 121 }; 122 123 // not sure which interface is more intuitive, we support both 124 // you can call "skew << pipe" or "skew_t skew(pipe); skew.process()" 125 // for the first we would need no _in member 126 template < typename TInput, typename TTextInput_, typename TSuffixArrayInput_ > 127 inline bool operator<<(Pipe< TInput, Bwt > &me, Bundle2< TTextInput_, TSuffixArrayInput_ > const &bundleIn) { 128 return me.process(bundleIn.in1, bundleIn.in2); 129 } 130 131 132 133 ////////////////////////////////////////////////////////////////////////////// 134 // external Bwt algorithm (optimized for multiple sequences) 135 ////////////////////////////////////////////////////////////////////////////// 136 137 138 template < typename TTextInput, typename TSuffixArrayInput, typename TPair, typename TLimitsString > 139 struct Value< Pipe< Bundle2< TTextInput, TSuffixArrayInput >, Multi<Bwt, TPair, TLimitsString> > > { 140 typedef typename Value<TTextInput>::Type Type; 141 }; 142 143 template <typename InType, typename TLimitsString, typename Result = typename Value<TLimitsString>::Type> 144 struct _filterGlobalizer : public std::unary_function<InType,Result> { 145 TLimitsString const &limits; 146 _filterGlobalizer(TLimitsString const &_limits) : limits(_limits) {} 147 inline Result operator()(const InType& x) const 148 { 149 return posGlobalize(x, limits); 150 } 151 }; 152 153 154 ////////////////////////////////////////////////////////////////////////////// 155 // bwt class 156 template < typename TTextInput, typename TSuffixArrayInput, typename TPair, typename TLimitsString > 157 struct Pipe< Bundle2< TTextInput, TSuffixArrayInput >, Multi<Bwt, TPair, TLimitsString> > 158 { 159 // *** SPECIALIZATION *** 160 161 typedef _filterGlobalizer<TypeOf_(TSuffixArrayInput), TLimitsString, TSizeOf_(TSuffixArrayInput)> filter_globalizer_t; 162 typedef Pipe< TSuffixArrayInput, Filter<filter_globalizer_t> > TGlobalizer; 163 typedef Pipe< TGlobalizer, Counter > TSA; 164 typedef typename Size<TTextInput>::Type TSize; 165 typedef Pool< TypeOf_(TSA), MapperSpec< MapperConfigSize< filterI1<TypeOf_(TSA)>, TSize> > > TInverter; 166 typedef Pipe< TInverter, Filter< filterI2<TypeOf_(TInverter)> > > TCounterFilter; 167 typedef Pipe< TTextInput, Shifter< -1, false > > TShiftText; 168 169 typedef Pipe< Bundle2< TCounterFilter, TShiftText >, Joiner > TJoiner; 170 typedef Pool< TypeOf_(TJoiner), MapperSpec< MapperConfigSize< filterI1<TypeOf_(TJoiner)>, TSize> > > TLinearMapper; 171 typedef Pipe< TLinearMapper, Filter< filterI2<TypeOf_(TLinearMapper)> > > TFilter; 172 173 TTextInput *textIn; 174 TSuffixArrayInput *suffixArrayIn; 175 TLinearMapper mapper; 176 TFilter in; 177 178 TLimitsString const &limits; 179 180 Pipe(TLimitsString const &_limits): 181 in(mapper), 182 limits(_limits) {} 183 184 Pipe(Bundle2< TTextInput, TSuffixArrayInput > const &_bundleIn, TLimitsString const &_limits): 185 textIn(&_bundleIn.in1), 186 suffixArrayIn(&_bundleIn.in2), 187 in(mapper), 188 limits(_limits) 189 { 190 process(); 191 } 192 193 inline void process() { 194 process(*textIn, *suffixArrayIn); 195 } 196 197 template < typename TTextInput_, typename TSuffixArrayInput_ > 198 bool process(TTextInput_ &textIn, TSuffixArrayInput_ &suffixArrayIn) { 199 200 // *** INSTANTIATION *** 201 202 for(int i=0;i<length(limits);++i) 203 std::cout << limits[i]<<" "; 204 std::cout<<std::endl; 205 206 TGlobalizer globalizer(suffixArrayIn, limits); 207 TSA sa(globalizer); 208 TInverter inverter; 209 TCounterFilter filter(inverter); 210 211 #ifdef SEQAN_DEBUG_INDEX 212 std::cerr << " invert suffix array" << std::endl; 213 #endif 214 inverter << sa; 215 SEQAN_PROMARK("Suffix-Array invertiert"); 216 217 TShiftText shifter(textIn); 218 TJoiner joiner(bundle2(filter, shifter)); 219 220 #ifdef SEQAN_DEBUG_INDEX 221 std::cerr << " de-invert suffix array" << std::endl; 222 #endif 223 mapper << joiner; 224 SEQAN_PROMARK("Suffix-Array linearisiert"); 225 226 return true; 227 } 228 229 inline typename Value<Pipe>::Type const operator*() const { 230 return *in; 231 } 232 233 inline Pipe& operator++() { 234 ++in; 235 return *this; 236 } 237 }; 238 239 // not sure which interface is more intuitive, we support both 240 // you can call "bwt << pipe" or "bwt_t bwt(pipe); bwt.process()" 241 // for the first we would need no _in member 242 template < typename TInput, typename TTextInput_, typename TSuffixArrayInput_, typename TPair, typename TLimitsString > 243 inline bool operator<<(Pipe< TInput, Multi<Bwt, TPair, TLimitsString> > &me, Bundle2< TTextInput_, TSuffixArrayInput_ > const &bundleIn) { 244 return me.process(bundleIn.in1, bundleIn.in2); 245 } 246 247 248 249 ////////////////////////////////////////////////////////////////////////////// 250 // internal Bwt algorithm 251 ////////////////////////////////////////////////////////////////////////////// 252 253 254 template < typename TBWT, 255 typename TText, 256 typename TSA > 257 void createBWTableInt( 258 TBWT &bwt, 259 TText const &s, 260 TSA const &SA) 261 { 262 typedef typename Value<TBWT>::Type TValue; 263 typedef typename GetValue<TSA>::Type TSAValue; 264 typedef typename Size<TSA>::Type TSize; 265 266 TSize n = length(s); 267 268 for (TSize i = 0; i < n; ++i) 269 { 270 TSAValue sa = getValue(SA, i); 271 if (sa != 0) 272 bwt[i] = getValue(s, sa - 1); 273 else 274 bwt[i] = TValue(); 275 // bwt[i] = getValue(s, n - 1); 276 } 277 } 278 279 template < typename TBWT, 280 typename TString, 281 typename TSpec, 282 typename TSA > 283 void createBWTableInt( 284 TBWT &bwt, 285 StringSet<TString, TSpec> const &s, 286 TSA const &SA) 287 { 288 typedef typename Value<TBWT>::Type TValue; 289 typedef typename Size<TSA>::Type TSize; 290 291 TSize n = lengthSum(s); 292 Pair<unsigned, typename Size<TString>::Type> loc; 293 294 for (TSize i = 0; i < n; ++i) 295 { 296 posLocalize(loc, getValue(SA, i), stringSetLimits(s)); 297 if (loc.i2 != 0) 298 bwt[i] = s[loc.i1][loc.i2 - 1]; 299 else 300 if (loc.i1 != 0) 301 bwt[i] = s[loc.i1 - 1][length(s[loc.i1 - 1]) - 1]; 302 else 303 bwt[i] = TValue(); 304 // bwt[i] = getValue(s, n - 1); 305 } 306 } 307 308 //} 309 310 } 311 312 #endif 313