1 /*==========================================================================
2 RazerS - Fast Read Mapping with Controlled Loss Rate
3 http://www.seqan.de/projects/razers.html
4
5 ============================================================================
6 Copyright (C) 2010
7
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 3 of the License, or (at your options) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 ==========================================================================*/
21
22 #ifndef SEQAN_HEADER_RAZERS_WINDOW_H
23 #define SEQAN_HEADER_RAZERS_WINDOW_H
24
25 #include <iostream>
26
27 namespace seqan {
28
29 //////////////////////////////////////////////////////////////////////////////
30 // Find read matches in a single genome sequence
31 //
32 // Creates finder on the contig given by the ID and a verifier.
33 // Searches through the contig using the findWindowNext() function.
34 // The results are dumped in the (aligned) store.
35 template <
36 typename TFragmentStore,
37 typename TReadIndex,
38 typename TSwiftSpec,
39 typename TPreprocessing,
40 typename TCounts,
41 typename TRazerSOptions,
42 typename TRazerSMode>
_mapSingleReadsToContigWindow(TFragmentStore & store,unsigned contigId,Pattern<TReadIndex,Swift<TSwiftSpec>> & swiftPattern,TPreprocessing & preprocessing,TCounts & cnts,char orientation,TRazerSOptions & options,TRazerSMode const & mode)43 void _mapSingleReadsToContigWindow(
44 TFragmentStore & store,
45 unsigned contigId, // ... and its sequence number
46 Pattern<TReadIndex, Swift<TSwiftSpec> > & swiftPattern,
47 TPreprocessing & preprocessing,
48 TCounts & cnts,
49 char orientation, // q-gram index of reads
50 TRazerSOptions & options,
51 TRazerSMode const & mode)
52 {
53 _mapSingleReadsToContigWindow(store, store, contigId, swiftPattern, swiftPattern, preprocessing, cnts, orientation, options, mode);
54 }
55
56 //////////////////////////////////////////////////////////////////////////////
57 // Find read matches in a single genome sequence
58 //
59 // Creates finder on the contig given by the ID and a verifier.
60 // Searches through the contig using the findWindowNext() function.
61 // The results are dumped in the (aligned) store.
62 // Specialized version: Contigs are taken from the main store but the results
63 // are written to the block store. Used in the parallel reads over whole
64 // genome version.
65 template <
66 typename TFragmentStore,
67 typename TReadIndex,
68 typename TSwiftSpec,
69 typename TSwiftPatternHandler,
70 typename TPreprocessing,
71 typename TCounts,
72 typename TRazerSOptions,
73 typename TRazerSMode>
_mapSingleReadsToContigWindow(TFragmentStore & mainStore,TFragmentStore & blockStore,unsigned contigId,Pattern<TReadIndex,Swift<TSwiftSpec>> & swiftPattern,TSwiftPatternHandler & swiftPatternHandler,TPreprocessing & preprocessing,TCounts & cnts,char orientation,TRazerSOptions & options,TRazerSMode const & mode)74 void _mapSingleReadsToContigWindow(
75 TFragmentStore & mainStore,
76 TFragmentStore & blockStore,
77 unsigned contigId, // ... and its sequence number
78 Pattern<TReadIndex, Swift<TSwiftSpec> > & swiftPattern,
79 TSwiftPatternHandler & swiftPatternHandler,
80 TPreprocessing & preprocessing,
81 TCounts & cnts,
82 char orientation, // q-gram index of reads
83 TRazerSOptions & options,
84 TRazerSMode const & mode)
85 {
86 // FILTRATION
87 typedef typename TFragmentStore::TContigSeq TContigSeq;
88 typedef Finder<TContigSeq, Swift<TSwiftSpec> > TSwiftFinder;
89 typedef Pattern<TReadIndex, Swift<TSwiftSpec> > TSwiftPattern;
90
91 // VERIFICATION
92 typedef MatchVerifier<
93 TFragmentStore,
94 TRazerSOptions,
95 TRazerSMode,
96 TPreprocessing,
97 TSwiftPattern,
98 TCounts> TVerifier;
99 typedef typename Fibre<TReadIndex, FibreText>::Type TReadSet;
100
101 // HITS
102 typedef typename TSwiftFinder::THitString THitString;
103 typedef typename Value<THitString>::Type TSwiftHit;
104 typedef typename Size<THitString>::Type THitStringSize;
105
106 typedef typename Size<typename TFragmentStore::TAlignedReadStore>::Type TAlignedReadStoreSize;
107
108 // output what is done if verbous
109 if (options._debugLevel >= 1)
110 {
111 std::cerr << std::endl << "Process genome seq #" << contigId;
112 if (orientation == 'F')
113 std::cerr << "[fwd]";
114 else
115 std::cerr << "[rev]";
116 }
117 // lock contig
118 lockContig(mainStore, contigId);
119 TContigSeq & contigSeq = mainStore.contigStore[contigId].seq;
120 if (orientation == 'R')
121 reverseComplement(contigSeq);
122
123 // Create finder and verifier
124 TSwiftFinder swiftFinder(contigSeq, options.repeatLength, 1);
125 TVerifier verifier(blockStore, options, preprocessing, swiftPattern, cnts);
126
127 // initialize verifier
128 verifier.onReverseComplement = (orientation == 'R');
129 verifier.genomeLength = length(contigSeq);
130 verifier.m.contigId = contigId;
131
132 // if the pattern can be initialized and there is a non-repeat region in the contig that fits a qgram.
133 if (windowFindBegin(swiftFinder, swiftPattern, options.errorRate))
134 {
135
136 _proFloat myTime = sysTime();
137
138 bool sequenceLeft = true;
139
140 // while there is more contig sequence to search through
141 while (sequenceLeft)
142 {
143 sequenceLeft = windowFindNext(swiftFinder, swiftPattern, 1000000);
144
145 printf("filter: %f sec\n", sysTime() - myTime);
146 myTime = sysTime();
147
148 // get the found hits from the finder
149 THitString hits = getSwiftHits(swiftFinder);
150 // verifiy them
151 for (THitStringSize h = 0; h < length(hits); ++h)
152 {
153 verifier.m.readId = hits[h].ndlSeqNo; //array oder jedesmal berechnen
154 matchVerify(verifier, swiftInfix(hits[h], contigSeq), hits[h].ndlSeqNo, host(host(swiftPattern))[hits[h].ndlSeqNo], mode);
155 ++options.countFiltration;
156 }
157
158 printf("verify: %f sec\n", sysTime() - myTime);
159 myTime = sysTime();
160
161 // compact matches if neccessary
162 if (length(blockStore.alignedReadStore) > options.compactThresh)
163 {
164 TAlignedReadStoreSize oldSize = length(blockStore.alignedReadStore);
165 if (IsSameType<typename TRazerSMode::TGapMode, RazerSGapped>::VALUE || options.threshold == 0)
166 maskDuplicates(blockStore, mode); // overlapping parallelograms cause duplicates
167
168 compactMatches(blockStore, cnts, options, mode, swiftPatternHandler, COMPACT);
169 if (options._debugLevel >= 2)
170 std::cerr << '(' << oldSize - length(blockStore.alignedReadStore) << " matches removed)";
171 }
172
173 }
174
175 // clear finders
176 windowFindEnd(swiftFinder, swiftPattern);
177
178 }
179
180 if (!unlockAndFreeContig(mainStore, contigId)) // if the contig is still used
181 if (orientation == 'R')
182 reverseComplement(contigSeq);
183 // we have to restore original orientation
184 }
185
186 }
187
188 #endif
189