1 /*==========================================================================
2  RazerS - Fast Read Mapping with Controlled Loss Rate
3  http://www.seqan.de/projects/razers.html
4 
5  ============================================================================
6  Copyright (C) 2010
7 
8  This program is free software; you can redistribute it and/or
9  modify it under the terms of the GNU Lesser General Public
10  License as published by the Free Software Foundation; either
11  version 3 of the License, or (at your options) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  Lesser General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  ==========================================================================*/
21 
22 #ifndef SEQAN_HEADER_RAZERS_WINDOW_H
23 #define SEQAN_HEADER_RAZERS_WINDOW_H
24 
25 #include <iostream>
26 
27 namespace seqan {
28 
29 //////////////////////////////////////////////////////////////////////////////
30 // Find read matches in a single genome sequence
31 //
32 // Creates finder on the contig given by the ID and a verifier.
33 // Searches through the contig using the findWindowNext() function.
34 // The results are dumped in the (aligned) store.
35 template <
36     typename TFragmentStore,
37     typename TReadIndex,
38     typename TSwiftSpec,
39     typename TPreprocessing,
40     typename TCounts,
41     typename TRazerSOptions,
42     typename TRazerSMode>
_mapSingleReadsToContigWindow(TFragmentStore & store,unsigned contigId,Pattern<TReadIndex,Swift<TSwiftSpec>> & swiftPattern,TPreprocessing & preprocessing,TCounts & cnts,char orientation,TRazerSOptions & options,TRazerSMode const & mode)43 void _mapSingleReadsToContigWindow(
44     TFragmentStore & store,
45     unsigned                                  contigId,                     // ... and its sequence number
46     Pattern<TReadIndex, Swift<TSwiftSpec> > & swiftPattern,
47     TPreprocessing & preprocessing,
48     TCounts & cnts,
49     char                                      orientation,                      // q-gram index of reads
50     TRazerSOptions & options,
51     TRazerSMode                       const & mode)
52 {
53     _mapSingleReadsToContigWindow(store, store, contigId, swiftPattern, swiftPattern, preprocessing, cnts, orientation, options, mode);
54 }
55 
56 //////////////////////////////////////////////////////////////////////////////
57 // Find read matches in a single genome sequence
58 //
59 // Creates finder on the contig given by the ID and a verifier.
60 // Searches through the contig using the findWindowNext() function.
61 // The results are dumped in the (aligned) store.
62 // Specialized version: Contigs are taken from the main store but the results
63 //   are written to the block store. Used in the parallel reads over whole
64 //   genome version.
65 template <
66     typename TFragmentStore,
67     typename TReadIndex,
68     typename TSwiftSpec,
69     typename TSwiftPatternHandler,
70     typename TPreprocessing,
71     typename TCounts,
72     typename TRazerSOptions,
73     typename TRazerSMode>
_mapSingleReadsToContigWindow(TFragmentStore & mainStore,TFragmentStore & blockStore,unsigned contigId,Pattern<TReadIndex,Swift<TSwiftSpec>> & swiftPattern,TSwiftPatternHandler & swiftPatternHandler,TPreprocessing & preprocessing,TCounts & cnts,char orientation,TRazerSOptions & options,TRazerSMode const & mode)74 void _mapSingleReadsToContigWindow(
75     TFragmentStore & mainStore,
76     TFragmentStore & blockStore,
77     unsigned                                  contigId,                     // ... and its sequence number
78     Pattern<TReadIndex, Swift<TSwiftSpec> > & swiftPattern,
79     TSwiftPatternHandler & swiftPatternHandler,
80     TPreprocessing & preprocessing,
81     TCounts & cnts,
82     char                                      orientation,                      // q-gram index of reads
83     TRazerSOptions & options,
84     TRazerSMode                      const & mode)
85 {
86     // FILTRATION
87     typedef typename TFragmentStore::TContigSeq             TContigSeq;
88     typedef Finder<TContigSeq, Swift<TSwiftSpec> >          TSwiftFinder;
89     typedef Pattern<TReadIndex, Swift<TSwiftSpec> >         TSwiftPattern;
90 
91     // VERIFICATION
92     typedef MatchVerifier<
93         TFragmentStore,
94         TRazerSOptions,
95         TRazerSMode,
96         TPreprocessing,
97         TSwiftPattern,
98         TCounts>                                           TVerifier;
99     typedef typename Fibre<TReadIndex, FibreText>::Type TReadSet;
100 
101     // HITS
102     typedef typename TSwiftFinder::THitString               THitString;
103     typedef typename Value<THitString>::Type                TSwiftHit;
104     typedef typename Size<THitString>::Type                 THitStringSize;
105 
106     typedef typename Size<typename TFragmentStore::TAlignedReadStore>::Type TAlignedReadStoreSize;
107 
108     // output what is done if verbous
109     if (options._debugLevel >= 1)
110     {
111         std::cerr << std::endl << "Process genome seq #" << contigId;
112         if (orientation == 'F')
113             std::cerr << "[fwd]";
114         else
115             std::cerr << "[rev]";
116     }
117     // lock contig
118     lockContig(mainStore, contigId);
119     TContigSeq & contigSeq = mainStore.contigStore[contigId].seq;
120     if (orientation == 'R')
121         reverseComplement(contigSeq);
122 
123     // Create finder and verifier
124     TSwiftFinder    swiftFinder(contigSeq, options.repeatLength, 1);
125     TVerifier       verifier(blockStore, options, preprocessing, swiftPattern, cnts);
126 
127     // initialize verifier
128     verifier.onReverseComplement = (orientation == 'R');
129     verifier.genomeLength = length(contigSeq);
130     verifier.m.contigId = contigId;
131 
132     // if the pattern can be initialized and there is a non-repeat region in the contig that fits a qgram.
133     if (windowFindBegin(swiftFinder, swiftPattern, options.errorRate))
134     {
135 
136         _proFloat myTime = sysTime();
137 
138         bool sequenceLeft = true;
139 
140         // while there is more contig sequence to search through
141         while (sequenceLeft)
142         {
143             sequenceLeft = windowFindNext(swiftFinder, swiftPattern, 1000000);
144 
145             printf("filter: %f sec\n", sysTime() - myTime);
146             myTime = sysTime();
147 
148             // get the found hits from the finder
149             THitString hits = getSwiftHits(swiftFinder);
150             // verifiy them
151             for (THitStringSize h = 0; h < length(hits); ++h)
152             {
153                 verifier.m.readId = hits[h].ndlSeqNo;             //array oder jedesmal berechnen
154                 matchVerify(verifier, swiftInfix(hits[h], contigSeq), hits[h].ndlSeqNo, host(host(swiftPattern))[hits[h].ndlSeqNo], mode);
155                 ++options.countFiltration;
156             }
157 
158             printf("verify: %f sec\n", sysTime() - myTime);
159             myTime = sysTime();
160 
161             // compact matches if neccessary
162             if (length(blockStore.alignedReadStore) > options.compactThresh)
163             {
164                 TAlignedReadStoreSize oldSize = length(blockStore.alignedReadStore);
165                 if (IsSameType<typename TRazerSMode::TGapMode, RazerSGapped>::VALUE || options.threshold == 0)
166                     maskDuplicates(blockStore, mode);       // overlapping parallelograms cause duplicates
167 
168                 compactMatches(blockStore, cnts, options, mode, swiftPatternHandler, COMPACT);
169                 if (options._debugLevel >= 2)
170                     std::cerr << '(' << oldSize - length(blockStore.alignedReadStore) << " matches removed)";
171             }
172 
173         }
174 
175         // clear finders
176         windowFindEnd(swiftFinder, swiftPattern);
177 
178     }
179 
180     if (!unlockAndFreeContig(mainStore, contigId))                              // if the contig is still used
181         if (orientation == 'R')
182             reverseComplement(contigSeq);
183     // we have to restore original orientation
184 }
185 
186 }
187 
188 #endif
189