1 #ifndef RAZERS_PAIRED_MATCH_FILTER_H_
2 #define RAZERS_PAIRED_MATCH_FILTER_H_
3
4 #include <seqan/graph_types/graph_idmanager.h>
5
6 #include "razers.h"
7
8 namespace seqan {
9
10 template <typename TThreadLocalStorage>
11 class FilterPatternLSetMaxErrorsWrapper;
12 template <typename TThreadLocalStorage>
13 class FilterPatternRSetMaxErrorsWrapper;
14
15 // ============================================================================
16 // Forwards
17 // ============================================================================
18
19 // ============================================================================
20 // Tags, Classes, Enums
21 // ============================================================================
22
23 template <typename TOptionsSpec, typename TReadSeqSet, typename TCallback>
24 class PairedMatchFilter
25 {
26 public:
27 // Number of reads.
28 unsigned readCount;
29 // If a read has more than matchThreshold matches then it gets a histogram.
30 unsigned matchThreshold;
31 // Fraction of reads expect to have histograms for.
32 double frac;
33 // Count matches for each read
34 String<unsigned> hitCount;
35 // Map from read number to histogram id.
36 std::unordered_map<unsigned, unsigned> pairIdToHistogramId;
37 // Id manager for histogram.
38 IdManager<unsigned> idManager;
39 // Histograms.
40 String<String<unsigned> > histograms;
41 // Ids of the reads that are purged.
42 String<unsigned> purgedPairIds;
43 // The callback context object.
44 Holder<TCallback> callback;
45 // Read ID offset.
46 unsigned readOffset;
47 // The read sequences.
48 TReadSeqSet const & readSeqs;
49 // The options object.
50 RazerSCoreOptions<TOptionsSpec> const & options;
51
PairedMatchFilter(unsigned readCount_,unsigned matchThreshold_,double frac_,TCallback & callback_,unsigned readOffset_,TReadSeqSet const & readSeqs_,RazerSCoreOptions<TOptionsSpec> const & options_)52 PairedMatchFilter(unsigned readCount_, unsigned matchThreshold_, double frac_, TCallback & callback_, unsigned readOffset_, TReadSeqSet const & readSeqs_, RazerSCoreOptions<TOptionsSpec> const & options_) :
53 readCount(readCount_), matchThreshold(matchThreshold_), frac(frac_), callback(callback_), readOffset(readOffset_), readSeqs(readSeqs_), options(options_)
54 {
55 resize(hitCount, readCount, 0, Exact());
56 reserve(histograms, unsigned(frac * readCount));
57 }
58
59 };
60
61 // ============================================================================
62 // Metafunctions
63 // ============================================================================
64
65 // ============================================================================
66 // Functions
67 // ============================================================================
68
69 template <typename TOptionsSpec, typename TReadSeqSet, typename TCallback>
70 inline unsigned
_createHistogram(PairedMatchFilter<TOptionsSpec,TReadSeqSet,TCallback> & filter,unsigned pairId)71 _createHistogram(PairedMatchFilter<TOptionsSpec, TReadSeqSet, TCallback> & filter, unsigned pairId)
72 {
73 unsigned result = obtainId(filter.idManager);
74 if (result >= length(filter.histograms))
75 resize(filter.histograms, length(filter.histograms) + 1);
76 resize(filter.histograms[result], 1 + (int)(filter.options.errorRate * (length(filter.readSeqs[2 * pairId]) + length(filter.readSeqs[2 * pairId + 1]))), 0, Exact());
77 SEQAN_ASSERT_LT(result, length(filter.histograms));
78 return result;
79 }
80
81 template <typename TOptionsSpec, typename TReadSeqSet, typename TCallback>
82 inline void
registerRead(PairedMatchFilter<TOptionsSpec,TReadSeqSet,TCallback> & filter,unsigned pairId,int score)83 registerRead(PairedMatchFilter<TOptionsSpec, TReadSeqSet, TCallback> & filter, unsigned pairId, int score)
84 {
85 // std::cerr << "registering read " << pairId << std::endl;
86 if (filter.hitCount[pairId - filter.readOffset] == std::numeric_limits<unsigned>::max())
87 return;
88
89 if (++filter.hitCount[pairId - filter.readOffset] < filter.matchThreshold)
90 return;
91
92 // TODO(holtgrew): Maybe global read to histogram map; faster?
93
94 // Get histogram id, insert new histogram if necessary, exit if no histogram yet.
95 unsigned histogramId = 0;
96 if (filter.hitCount[pairId - filter.readOffset] == filter.matchThreshold)
97 {
98 // std::cerr << "new histogram for read " << pairId << std::endl;
99 histogramId = _createHistogram(filter, pairId);
100 filter.pairIdToHistogramId[pairId] = histogramId;
101 }
102 else
103 {
104 // std::cerr << "updating histogram for read " << pairId << std::endl;
105 typedef typename std::unordered_map<unsigned, unsigned>::iterator TIterator;
106 TIterator it = filter.pairIdToHistogramId.find(pairId);
107 SEQAN_ASSERT(it != filter.pairIdToHistogramId.end());
108 histogramId = it->second;
109 }
110
111 // Insert value into histogram.
112 _incrementCount(filter, histogramId, score);
113 }
114
115 template <typename TOptionsSpec, typename TReadSeqSet, typename TCallback>
116 inline bool
processRead(PairedMatchFilter<TOptionsSpec,TReadSeqSet,TCallback> & filter,unsigned pairId)117 processRead(PairedMatchFilter<TOptionsSpec, TReadSeqSet, TCallback> & filter, unsigned pairId)
118 {
119 typedef typename std::unordered_map<unsigned, unsigned>::iterator TIterator;
120
121 if (filter.hitCount[pairId - filter.readOffset] < filter.matchThreshold)
122 return false;
123
124 // std::cerr << "processing read " << pairId << std::endl;
125 // Get histogram id, insert new histogram if necessary, exit if no histogram yet.
126 TIterator it = filter.pairIdToHistogramId.find(pairId);
127 if (it == filter.pairIdToHistogramId.end())
128 return false; // Must have been disabled before.
129
130 unsigned histogramId = it->second;
131
132 // Perform actions.
133 int newLimit;
134
135 if (filter.options.scoreDistanceRange == 0)
136 {
137 newLimit = _newLimit(filter, histogramId);
138 if (newLimit == NO_NEW_LIMIT)
139 return false;
140
141 if (filter.options.purgeAmbiguous)
142 {
143 // std::cerr << "PURGED " << pairId << std::endl;
144 appendValue(filter.purgedPairIds, pairId);
145 newLimit = 0;
146 }
147 }
148 else
149 {
150 newLimit = _newLimitDistRange(filter, histogramId);
151 if (newLimit == CAN_BE_PURGED)
152 {
153 // std::cerr << "PURGED " << pairId << std::endl;
154 appendValue(filter.purgedPairIds, pairId);
155 newLimit = 0;
156 }
157 }
158
159 // std::cerr << "LIMITING " << pairId << "\t" << filter.histograms[histogramId][0] << "\t" << filter.hitCount[pairId - filter.readOffset] << "\t" << newLimit << std::endl;
160 FilterPatternLSetMaxErrorsWrapper<TCallback> wrapperL(value(filter.callback));
161 FilterPatternRSetMaxErrorsWrapper<TCallback> wrapperR(value(filter.callback));
162 setMaxErrors(wrapperL, pairId, newLimit - 1);
163 setMaxErrors(wrapperR, pairId, newLimit - 1);
164 if (filter.options.errorCutOff[2 * pairId] > newLimit)
165 filter.options.errorCutOff[2 * pairId] = newLimit;
166 if (filter.options.errorCutOff[2 * pairId + 1] > newLimit)
167 filter.options.errorCutOff[2 * pairId + 1] = newLimit;
168
169 if (newLimit == 0)
170 {
171 _freeHistogram(filter, histogramId);
172 filter.pairIdToHistogramId.erase(pairId);
173 filter.hitCount[pairId - filter.readOffset] = std::numeric_limits<unsigned>::max();
174 return true;
175 }
176 return false;
177 }
178
179 } // namespace seqan
180
181 #endif // #ifndef RAZERS_PAIRED_MATCH_FILTER_H_
182