1 /*
2 * CloseSweep.cpp
3 *
4 * Created on: Sep 25, 2014
5 * Author: nek3d
6 */
7
8 #include "CloseSweep.h"
9 #include "ContextClosest.h"
10
RecDistList(int maxSize)11 RecDistList::RecDistList(int maxSize)
12 : _kVal(maxSize),
13 _empty(true),
14 _currNumIdxs(0),
15 _totalRecs(0)
16
17 {
18
19 _allRecs.resize(_kVal);
20 for (int i=0; i < _kVal; i++) {
21 _allRecs[i] = new elemsType();
22 }
23 _distIndex = new indexType[_kVal];
24
25 clear();
26 }
27
~RecDistList()28 RecDistList::~RecDistList() {
29 for (int i=0; i < _kVal; i++) {
30 delete _allRecs[i];
31 }
32 delete _distIndex;
33 _distIndex = NULL;
34
35 }
36
clear()37 void RecDistList::clear() {
38 for (int i=0; i < _kVal; i++) {
39 _allRecs[i]->clear();
40 _allRecs[i]->reserve(16);
41 }
42 for (int i=0; i < _kVal; i++) {
43 _distIndex[i].first = -1;
44 _distIndex[i].second = -1;
45 }
46 _currNumIdxs = 0;
47 _empty = true;
48 _totalRecs = 0;
49 }
50
addRec(CHRPOS dist,Record * record,chromDirType chromDir)51 bool RecDistList::addRec(CHRPOS dist, Record *record, chromDirType chromDir) {
52 CHRPOS newPos = 0;
53 bool mustAppend = false;
54 int useElemIdx = 0;
55
56 if (dist > getMaxDist()) {
57 //dist is bigger than any currently contained.
58 if (_currNumIdxs == _kVal) {
59 return false;
60 }
61 mustAppend = true;
62 useElemIdx = _currNumIdxs;
63 newPos = _currNumIdxs;
64 }
65 if (find(dist, newPos)) {
66 _allRecs[_distIndex[newPos].second]->push_back(elemPairType(chromDir, record));
67 _totalRecs++;
68 return true;
69 }
70 if (!mustAppend) {
71 //smaller than maxDist, and doesn't currently exist. must insert
72 //newPos now is the insertion point.
73 CHRPOS startShiftPos = 0;
74 if (_currNumIdxs == _kVal) {
75 //already full. must remove oldest max.
76 //determine which vector it was using
77 //so we can re-use it.
78 startShiftPos = _kVal-1;
79 useElemIdx = _distIndex[startShiftPos].second;
80 } else {
81 //can add a new element
82 startShiftPos = _currNumIdxs;
83 useElemIdx = _currNumIdxs;
84 _currNumIdxs++;
85 }
86 for (CHRPOS i=startShiftPos; i > newPos; i--) {
87 _distIndex[i].first = _distIndex[i-1].first;
88 _distIndex[i].second = _distIndex[i-1].second;
89 }
90 } else {
91 _currNumIdxs++;
92 }
93 _allRecs[useElemIdx]->clear();
94 _allRecs[useElemIdx]->reserve(16);
95 _allRecs[useElemIdx]->push_back(elemPairType(chromDir, record));
96
97 _distIndex[newPos].first = (int)dist;
98 _distIndex[newPos].second = useElemIdx;
99 _empty = false;
100 _totalRecs++;
101 return true;
102 }
103
104 //if true, pos will be the idx the distance is at.
105 //if false, pos will be the idx to insert at.
find(CHRPOS dist,CHRPOS & pos) const106 bool RecDistList::find(CHRPOS dist, CHRPOS &pos) const {
107 CHRPOS lbound=0, ubound=_currNumIdxs-1, currVal =0;
108 pos = 0;
109 while(lbound <= ubound)
110 {
111 pos = (lbound + ubound) / 2;
112 currVal = _distIndex[pos].first;
113 if (currVal == dist) {
114 return true;
115 }
116 if (dist > currVal) {
117 lbound = pos + 1;
118 } else {
119 ubound = pos -1;
120 }
121 }
122 pos = (ubound == -1 ) ? 0 : (lbound == _currNumIdxs ? _currNumIdxs : lbound);
123 return false;
124 }
125
getMaxLeftEndPos() const126 CHRPOS RecDistList::getMaxLeftEndPos() const {
127
128 if (_empty) return -1;
129
130 CHRPOS maxDist =_distIndex[_currNumIdxs-1].first;
131
132 const elemsType *elems = _allRecs[_distIndex[_currNumIdxs-1].second];
133 for (int i=0; i < (int)elems->size(); i++) {
134 const elemPairType & elem = (*elems)[i];
135 if (elem.first == LEFT) {
136 return maxDist;
137 }
138 }
139 return -1;
140
141 }
142
143
CloseSweep(ContextClosest * context)144 CloseSweep::CloseSweep(ContextClosest *context)
145 : NewChromSweep(context),
146 _context(context),
147 _kClosest(_context->getNumClosestHitsWanted()),
148 _sameStrand(false),
149 _diffStrand(false),
150
151 _refDist(false),
152 _aDist(false),
153 _bDist(false),
154
155 _ignoreUpstream(false),
156 _ignoreDownstream(false),
157
158 _qForward(false),
159 _qReverse(false),
160 _dbForward(false),
161 _dbReverse(false),
162
163 _tieMode(ContextClosest::ALL_TIES),
164 _firstTie(false),
165 _lastTie(false),
166 _allTies(false)
167 {
168
169 _minUpstreamRecs.resize(_numDBs, NULL);
170 _minDownstreamRecs.resize(_numDBs, NULL);
171 _overlapRecs.resize(_numDBs, NULL);
172 _maxPrevLeftClosestEndPos.resize(_numDBs, 0);
173 _maxPrevLeftClosestEndPosReverse.resize(_numDBs, 0);
174
175 for (int i=0; i < _numDBs; i++) {
176 _minUpstreamRecs[i] = new RecDistList(_kClosest);
177 _minDownstreamRecs[i] = new RecDistList(_kClosest);
178 _overlapRecs[i] = new RecDistList(_kClosest);
179 }
180 }
181
~CloseSweep(void)182 CloseSweep::~CloseSweep(void) {
183 for (int i=0; i < _numDBs; i++) {
184 delete _minUpstreamRecs[i];
185 delete _minDownstreamRecs[i];
186 delete _overlapRecs[i];
187 }
188 }
189
init()190 bool CloseSweep::init() {
191
192 bool retVal = NewChromSweep::init();
193 _runToQueryEnd = true;
194
195 // Some abbreviations to make the code less miserable.
196 _sameStrand = _context->getSameStrand();
197 _diffStrand = _context->getDiffStrand();
198
199 _refDist = _context->getStrandedDistMode() == ContextClosest::REF_DIST;
200 _aDist = _context->getStrandedDistMode() == ContextClosest::A_DIST;
201 _bDist = _context->getStrandedDistMode() == ContextClosest::B_DIST;
202
203 _ignoreUpstream = _context->ignoreUpstream();
204 _ignoreDownstream = _context->ignoreDownstream();
205
206 _tieMode = _context->getTieMode();
207 _firstTie = _tieMode == ContextClosest::FIRST_TIE;
208 _lastTie = _tieMode == ContextClosest::LAST_TIE;
209 _allTies = _tieMode == ContextClosest::ALL_TIES;
210
211
212 return retVal;
213 }
214
masterScan(RecordKeyVector & retList)215 void CloseSweep::masterScan(RecordKeyVector &retList) {
216
217 _qForward = _currQueryRec->getStrandVal() == Record::FORWARD;
218 _qReverse = _currQueryRec->getStrandVal() == Record::REVERSE;
219
220 if (_currQueryChromName != _prevQueryChromName) testChromOrder(_currQueryRec);
221 if (_context->reportDistance()) {
222 _finalDistances.clear();
223 }
224
225 for (int i=0; i < _numDBs; i++) {
226
227 //first clear out everything from the previous scan
228 _minUpstreamRecs[i]->clear();
229 _minDownstreamRecs[i]->clear();
230 _overlapRecs[i]->clear();
231
232 if (dbFinished(i) || chromChange(i, retList, true)) {
233 continue;
234 } else {
235
236 // scan the database cache for hits
237 scanCache(i, retList);
238
239 // skip if we hit the end of the DB
240 // advance the db until we are ahead of the query. update hits and cache as necessary
241 bool stopScanning = false;
242 while (_currDbRecs[i] != NULL &&
243 _currQueryRec->sameChrom(_currDbRecs[i]) &&
244 !stopScanning) {
245 if (considerRecord(_currDbRecs[i], i, stopScanning) == DELETE) {
246 _dbFRMs[i]->deleteRecord(_currDbRecs[i]);
247 _currDbRecs[i] = NULL;
248 } else {
249 _caches[i].push_back(_currDbRecs[i]);
250 _currDbRecs[i] = NULL;
251 }
252 nextRecord(false, i);
253 }
254 }
255 finalizeSelections(i, retList);
256 }
257 checkMultiDbs(retList);
258 }
259
scanCache(int dbIdx,RecordKeyVector & retList)260 void CloseSweep::scanCache(int dbIdx, RecordKeyVector &retList) {
261 recListIterType cacheIter = _caches[dbIdx].begin();
262 while (cacheIter != _caches[dbIdx].end())
263 {
264 Record *cacheRec = cacheIter->value();
265 bool stopScanning = false;
266 if (considerRecord(cacheRec, dbIdx, stopScanning) == DELETE) {
267 cacheIter = _caches[dbIdx].deleteCurrent();
268 _dbFRMs[dbIdx]->deleteRecord(cacheRec);
269 } else {
270 cacheIter = _caches[dbIdx].next();
271 }
272 if (stopScanning) break;
273 }
274 }
275
276
considerRecord(Record * cacheRec,int dbIdx,bool & stopScanning)277 CloseSweep::rateOvlpType CloseSweep::considerRecord(Record *cacheRec, int dbIdx, bool &stopScanning) {
278
279 // Determine whether the hit and query intersect, and if so, what to do about it.
280 _dbForward = cacheRec->getStrandVal() == Record::FORWARD;
281 _dbReverse = cacheRec->getStrandVal() == Record::REVERSE;
282 CHRPOS currDist = 0;
283
284 if (intersects(_currQueryRec, cacheRec)) {
285
286 // HIT INTERSECTS QUERY
287 return tryToAddRecord(cacheRec, 0, dbIdx, stopScanning, OVERLAP, INTERSECT);
288
289 } else if (cacheRec->after(_currQueryRec)) {
290
291 // HIT IS TO THE RIGHT OF THE QUERY.
292
293 currDist = (cacheRec->getStartPos() - _currQueryRec->getEndPos()) + 1;
294 if (_context->signDistance()) {
295 if ((_aDist && _qReverse) ||
296 (_bDist && _dbForward))
297 {
298 // hit is "upstream" of A
299 return tryToAddRecord(cacheRec, abs(currDist), dbIdx, stopScanning, RIGHT, UPSTREAM);
300 }
301 }
302 // HIT IS DOWNSTREAM.
303 return tryToAddRecord(cacheRec, abs(currDist), dbIdx, stopScanning, RIGHT, DOWNSTREAM);
304 } else if (_currQueryRec->after(cacheRec)){
305
306 // HIT IS TO THE LEFT OF THE QUERY.
307 currDist = (_currQueryRec->getStartPos() - cacheRec->getEndPos()) + 1;
308 if (_context->signDistance()) {
309 if ((_aDist && _qReverse) ||
310 (_bDist && _dbForward))
311 {
312 // HIT IS DOWNSTREAM.
313 return tryToAddRecord(cacheRec, abs(currDist), dbIdx, stopScanning, LEFT, DOWNSTREAM);
314 }
315 }
316 // hit is "UPSTREAM" of A
317 return tryToAddRecord(cacheRec, abs(currDist), dbIdx, stopScanning, LEFT, UPSTREAM);
318 }
319 return IGNORE;
320 }
321
finalizeSelections(int dbIdx,RecordKeyVector & retList)322 void CloseSweep::finalizeSelections(int dbIdx, RecordKeyVector &retList) {
323
324 // Determine whether the overlaps, upstream, or downstream records have
325 // the k closest hits.
326
327 // The first thing to do is set the leftmost end pos used by records on the left.
328 // This will control when the cache is purged during the next query's sweep.
329 setLeftClosestEndPos(dbIdx);
330
331
332
333 RecDistList *upRecs = _minUpstreamRecs[dbIdx];
334 RecDistList *downRecs = _minDownstreamRecs[dbIdx];
335 RecDistList *overlaps = _overlapRecs[dbIdx];
336 RecDistList::constIterType upIter = upRecs->begin();
337 RecDistList::constIterType downIter = downRecs->begin();
338 CHRPOS upDist = INT_MAX;
339 CHRPOS downDist = INT_MAX;
340
341 int totalHitsUsed = 0;
342
343
344 // If forcing upstream, use those first.
345 if (_context->forceUpstream()) {
346 //add upstream recs until all are used or K hits taken.
347 while (upIter != upRecs->end() && totalHitsUsed < _kClosest) {
348 upDist = upRecs->currDist(upIter);
349 totalHitsUsed += addRecsToRetList(upRecs->allElems(upIter), 0 - upDist, retList);
350 upIter++;
351 }
352 }
353
354 // If forcing downstream, use those first/next.
355 if (_context->forceDownstream()) {
356 while (downIter != downRecs->end() && totalHitsUsed < _kClosest) {
357 downDist = downRecs->currDist(downIter);
358 totalHitsUsed += addRecsToRetList(downRecs->allElems(downIter), downDist, retList);
359 downIter++;
360 }
361 }
362
363
364 //start with the overlaps, which will all have distance zero.
365 if (totalHitsUsed < _kClosest && !overlaps->empty()) {
366 //there are overlaps.
367 totalHitsUsed += addRecsToRetList(overlaps->allElems(overlaps->begin()), 0, retList);
368 }
369
370 //now check the upstream and downstream recs, starting with the beginning of each,
371 //and grabbing whichever has the closest records as we work our way out to the
372 //max dist. Continue until we have K records, or the reclists are empty.
373
374 while (totalHitsUsed < _kClosest) {
375 upDist = INT_MAX;
376 downDist = INT_MAX;
377
378 if (upIter != upRecs->end()) {
379 upDist = upRecs->currDist(upIter);
380 }
381 if (downIter != downRecs->end()) {
382 downDist = downRecs->currDist(downIter);
383 }
384
385 //stop if no hits are left to consider
386 if ((upDist == INT_MAX) && (downDist == INT_MAX)) break;
387
388 bool tie = upDist == downDist;
389 bool usedUp = false;
390 bool usedDown = false;
391 if (upDist < downDist || (tie && !_lastTie)) {
392 totalHitsUsed += addRecsToRetList(upRecs->allElems(upIter), 0 - upDist, retList);
393 upIter++;
394 usedUp = true;
395 }
396 if (downDist < upDist || (tie && !_firstTie)) {
397 totalHitsUsed += addRecsToRetList(downRecs->allElems(downIter), downDist, retList);
398 downIter++;
399 usedDown = true;
400 }
401 if (tie) {
402 // If there was a tie, but we didn't use both elements because of the tie mode,
403 // then we still have to increment the iterator of the unused element so it
404 // isn't used later.
405 if (usedUp && !usedDown) {
406 downIter++;
407 } else if (usedDown && !usedUp) {
408 upIter++;
409 }
410 }
411 }
412
413 }
414
415
416
addRecsToRetList(RecDistList::elemsType * recs,CHRPOS currDist,RecordKeyVector & retList)417 int CloseSweep::addRecsToRetList(RecDistList::elemsType *recs, CHRPOS currDist, RecordKeyVector &retList) {
418
419 int hitsUsed = 0;
420 int numRecs = (int)recs->size(); //just to clean the code some.
421
422
423 if (_firstTie) {
424 addSingleRec(recs->at(0).second, currDist, hitsUsed, retList);
425 return 1;
426
427 } else if (_lastTie) {
428 addSingleRec(recs->at(numRecs-1).second, currDist, hitsUsed, retList);
429 return 1;
430
431 } else { //tieMode == ALL_TIES
432
433 for (int i=0; i < numRecs; i++) {
434 addSingleRec(recs->at(i).second, currDist, hitsUsed, retList);
435 }
436 return numRecs;
437 }
438 }
439
addSingleRec(Record * rec,CHRPOS currDist,int & hitsUsed,RecordKeyVector & retList)440 void CloseSweep::addSingleRec(Record *rec, CHRPOS currDist, int &hitsUsed, RecordKeyVector &retList) {
441 retList.push_back(rec);
442 _finalDistances.push_back(currDist);
443 hitsUsed++;
444 }
445
checkMultiDbs(RecordKeyVector & retList)446 void CloseSweep::checkMultiDbs(RecordKeyVector &retList) {
447 // //can skip this method if there's only one DB, or if we are
448 // //resolving closest hits for each db instead of all of them
449 if (_context->getMultiDbMode() != ContextClosest::ALL_DBS || _numDBs == 1) return;
450
451
452 // Get the K closest hits among multiple databases,
453 // while not counting ties more than once if the tieMode
454 // is "first" or "last".
455 // Start by entering all hits and their absolute distances
456 // into a vector of distance tuples, then sort it.
457
458 vector<distanceTuple> copyDists;
459 int numHits = (int)retList.size();
460 copyDists.resize(numHits);
461 CHRPOS i=0;
462 for (RecordKeyVector::iterator_type iter = retList.begin(); iter != retList.end(); iter++) {
463 CHRPOS dist = _finalDistances[i];
464 copyDists[i]._dist = abs(dist);
465 copyDists[i]._rec = *iter;
466 copyDists[i]._isNeg = dist < 0;
467 i++;
468 }
469
470 // sort the hits by distance
471 sort(copyDists.begin(), copyDists.end(), DistanceTupleSortAscFunctor());
472
473 //now we want to build a map telling us what distances are tied,
474 //and how many of each of these there are. Use a map<int, int>,
475 //where the key is a distance (in absolute value) and the value
476 //is the number of ties that that distance has.
477 map<CHRPOS, CHRPOS> ties;
478 for (vector<distanceTuple>::iterator i = copyDists.begin(); i != copyDists.end(); ++i)
479 ++ties[i->_dist];
480
481 // Clear the original list and distances, and re-populate
482 // until we have the desired number of hits, skipping
483 // over any unwanted ties.
484 retList.clearVector();
485 _finalDistances.clear();
486
487 int hitsUsed = 0;
488 for (i=0; i < numHits && hitsUsed < _kClosest; i++) {
489 CHRPOS dist = copyDists[i]._dist;
490 bool isNeg = copyDists[i]._isNeg;
491 //see if this distance is tied with any other
492 map<CHRPOS, CHRPOS>::iterator iter = ties.find(dist);
493 if (iter != ties.end()) {
494 //tie was found
495 CHRPOS numTies = iter->second;
496 if (!_allTies) {
497 if (_firstTie) {
498 //just add the first of the ties
499 addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
500 i += numTies - 1; // use first, then skip ahead by the number of ties, minus 1 because
501 //loop is about to be incremented
502 } else { //tieMode == LAST_TIE. Just add the last of the ties.
503 i += numTies -1;
504 dist = copyDists[i]._dist;
505 isNeg = copyDists[i]._isNeg;
506 addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
507 }
508 } else {
509 // tieMode is ALL_TIES, use all hits.
510 for (int j = i; j < i + numTies; j++) {
511 dist = copyDists[j]._dist;
512 isNeg = copyDists[j]._isNeg;
513 addSingleRec(copyDists[j]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
514 }
515 i += numTies - 1; //skip ahead by the number of ties, minus 1 because
516 //loop is about to be incremented
517 }
518 } else {
519 addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
520 }
521 }
522 }
523
chromChange(int dbIdx,RecordKeyVector & retList,bool wantScan)524 bool CloseSweep::chromChange(int dbIdx, RecordKeyVector &retList, bool wantScan)
525 {
526 Record *dbRec = _currDbRecs[dbIdx];
527
528 bool haveQuery = _currQueryRec != NULL;
529 bool haveDB = dbRec != NULL;
530
531 if (haveQuery && _currQueryChromName != _prevQueryChromName) {
532 _context->testNameConventions(_currQueryRec);
533 testChromOrder(_currQueryRec);
534 }
535
536 if (haveDB) {
537 _context->testNameConventions(dbRec);
538 testChromOrder(dbRec);
539 }
540
541 // the files are on the same chrom
542 if (haveQuery && (!haveDB || _currQueryRec->sameChrom(dbRec))) {
543
544 //if this is the first time the query's chrom is ahead of the chrom that was in this cache,
545 //then we have to clear the cache.
546 if (!_caches[dbIdx].empty() && queryChromAfterDbRec(_caches[dbIdx].begin()->value())) {
547 clearCache(dbIdx);
548 clearClosestEndPos(dbIdx);
549 }
550 return false;
551 }
552
553 if (!haveQuery || !haveDB) return false;
554
555 if (!_caches[dbIdx].empty() && (_caches[dbIdx].begin()->value()->sameChrom(_currQueryRec))) {
556 //the newest DB record's chrom is ahead of the query, but the cache still
557 //has old records on that query's chrom
558 scanCache(dbIdx, retList);
559 finalizeSelections(dbIdx, retList);
560 return true;
561 }
562
563
564 // the query is ahead of the database. fast-forward the database to catch-up.
565 if (queryChromAfterDbRec(dbRec)) {
566 string oldDbChrom(dbRec->getChrName());
567
568 while (dbRec != NULL &&
569 queryChromAfterDbRec(dbRec)) {
570 _dbFRMs[dbIdx]->deleteRecord(dbRec);
571 if (!nextRecord(false, dbIdx)) break;
572 dbRec = _currDbRecs[dbIdx];
573 const string &newDbChrom = dbRec->getChrName();
574 if (newDbChrom != oldDbChrom) {
575 testChromOrder(dbRec);
576 oldDbChrom = newDbChrom;
577 }
578 }
579 clearCache(dbIdx);
580 clearClosestEndPos(dbIdx);
581 return false;
582 }
583 // the database is ahead of the query.
584 else {
585 // 1. scan the cache for remaining hits on the query's current chrom.
586 if (wantScan) scanCache(dbIdx, retList);
587
588 return true;
589 }
590
591 //control can't reach here, but compiler still wants a return statement.
592 return true;
593 }
594
595
setLeftClosestEndPos(int dbIdx)596 void CloseSweep::setLeftClosestEndPos(int dbIdx)
597 {
598
599 //try to determine max end pos of hits to left of query.
600 //first check for exceptions due to options that cause
601 //complete rejection of all left side hits.
602
603 //no records found to left of query,
604 //can't set purge point, except for some special cases.
605 purgeDirectionType purgeDir = purgePointException();
606 if (purgeDir == BOTH || purgeDir == FORWARD_ONLY) {
607 _maxPrevLeftClosestEndPos[dbIdx] = max(_currQueryRec->getStartPos(), _maxPrevLeftClosestEndPos[dbIdx]);
608 }
609 if (purgeDir == BOTH || purgeDir == REVERSE_ONLY) {
610 _maxPrevLeftClosestEndPosReverse[dbIdx] = max(_currQueryRec->getStartPos(), _maxPrevLeftClosestEndPosReverse[dbIdx]);
611 }
612 if (purgeDir != NEITHER) return;
613
614 RecDistList *upRecs = _minUpstreamRecs[dbIdx];
615 RecDistList *downRecs = _minDownstreamRecs[dbIdx];
616
617 CHRPOS upDist = upRecs->getMaxLeftEndPos();
618 CHRPOS downDist = downRecs->getMaxLeftEndPos();
619
620
621 if (upDist == -1 && downDist == -1) return;
622
623 CHRPOS leftMostEndPos = (_currQueryRec->getStartPos() - max(upDist, downDist)) +1;
624 if ((!_sameStrand && !_diffStrand) ||
625 (_sameStrand && _qForward) ||
626 (_diffStrand && _qReverse)) {
627
628 _maxPrevLeftClosestEndPos[dbIdx] = max(leftMostEndPos, _maxPrevLeftClosestEndPos[dbIdx]);
629 } else {
630 _maxPrevLeftClosestEndPosReverse[dbIdx] = max(leftMostEndPos, _maxPrevLeftClosestEndPosReverse[dbIdx]);
631 }
632 }
633
beforeLeftClosestEndPos(int dbIdx,Record * rec)634 bool CloseSweep::beforeLeftClosestEndPos(int dbIdx, Record *rec)
635 {
636 CHRPOS recEndPos = rec->getEndPos();
637 CHRPOS prevPos = _maxPrevLeftClosestEndPos[dbIdx];
638 CHRPOS prevPosReverse = _maxPrevLeftClosestEndPosReverse[dbIdx];
639
640 if (!_sameStrand && !_diffStrand) {
641 return recEndPos < prevPos;
642 } else {
643 if (rec->getStrandVal() == Record::FORWARD) {
644 return recEndPos < prevPos;
645 } else {
646 return recEndPos < prevPosReverse;
647 }
648 }
649 return false;
650 }
651
clearClosestEndPos(int dbIdx)652 void CloseSweep::clearClosestEndPos(int dbIdx)
653 {
654 _maxPrevLeftClosestEndPos[dbIdx] = 0;
655 _maxPrevLeftClosestEndPosReverse[dbIdx] = 0;
656 }
657
tryToAddRecord(Record * cacheRec,CHRPOS dist,int dbIdx,bool & stopScanning,chromDirType chromDir,streamDirType streamDir)658 CloseSweep::rateOvlpType CloseSweep::tryToAddRecord(Record *cacheRec, CHRPOS dist, int dbIdx, bool &stopScanning, chromDirType chromDir, streamDirType streamDir) {
659
660 //
661 // Decide whether to ignore hit
662 //
663 // If want same strand, and they're unknown or not the same, ignore
664 // If we want diff strand, and they're unknown or different, ignore.
665 // If we want diff names and they're the same, ignore
666 // If stream is unwanted, ignore.
667 bool hasUnknownStrands = (_currQueryRec->getStrandVal() == Record::UNKNOWN || cacheRec->getStrandVal() == Record::UNKNOWN);
668 bool wantedSameNotSame = (_sameStrand && (hasUnknownStrands || (_currQueryRec->getStrandVal() != cacheRec->getStrandVal())));
669 bool wantedDiffNotDiff = (_diffStrand && (hasUnknownStrands || (_currQueryRec->getStrandVal() == cacheRec->getStrandVal())));
670 bool badStrand = wantedSameNotSame || wantedDiffNotDiff;
671 bool badNames = (_context->diffNames() && (cacheRec->getName() == _currQueryRec->getName()));
672 bool badStream = (streamDir == UPSTREAM ? _ignoreUpstream : (streamDir == DOWNSTREAM ? _ignoreDownstream : _context->ignoreOverlaps()));
673
674 bool shouldIgnore = badStrand || badNames || badStream;
675
676
677 // You would think ignoring it means we could stop here, but even then,
678 // hits on the left may need to be purged from the cache,
679 // and hits on the right tell us when to stop scanning the cache.
680
681
682 //establish which set of hits we are looking at.
683 RecDistList *useList = (streamDir == UPSTREAM ? _minUpstreamRecs[dbIdx] : (streamDir == INTERSECT ? _overlapRecs[dbIdx] : _minDownstreamRecs[dbIdx]));
684
685
686 if (chromDir == OVERLAP && !shouldIgnore) {
687 useList->addRec(0, cacheRec, RecDistList::OVERLAP);
688 }
689
690 else if (chromDir == LEFT) {
691 if (beforeLeftClosestEndPos(dbIdx, cacheRec)) {
692 return DELETE;
693 }
694 if (!shouldIgnore) {
695 useList->addRec(dist, cacheRec, RecDistList::LEFT);
696 }
697 }
698
699 else if (chromDir == RIGHT) {
700 //hit is to the right of query. Need to know when to stop scanning.
701 // if NOT ignoring:
702 // if we're able to add it to the useList, definitely DON'T stop,
703 // hit was valid, so next one could be too.
704 // if we're UNABLE to add it to the uselist, definitely DO stop,
705 // because useList is full.
706 // but if we ARE ignoring:
707 // can only stop in the rare cases where hits to the right
708 // are ALWAYS getting ignored, regardless of query and cache strand
709 // Otherwise, always continue.
710 if (!shouldIgnore) {
711 if (!useList->addRec(dist, cacheRec, RecDistList::RIGHT)) {
712 stopScanning = true;
713 }
714 } else { // hit is ignored
715 if (allHitsRightOfQueryIgnored()) {
716 stopScanning = true;
717 }
718 }
719 }
720 return IGNORE;
721 }
722
allHitsRightOfQueryIgnored()723 bool CloseSweep::allHitsRightOfQueryIgnored() {
724 return ((_refDist && _ignoreDownstream) ||
725 (_aDist && ((_ignoreUpstream && _qReverse) || (_ignoreDownstream && _qForward))) ||
726 (_bDist && ((_ignoreDownstream && ((_qForward && _diffStrand) || (_qReverse && _sameStrand))) ||
727 ((_ignoreUpstream && ((_qReverse && _diffStrand) || (_qForward && _sameStrand)))))));
728 }
729
730
purgePointException()731 CloseSweep::purgeDirectionType CloseSweep::purgePointException() {
732
733 // Normally, we can't set a cache purge point if there are no
734 // records to the left of the query.
735
736 // This method will detect use cases that cause all of the left side
737 // hits to have been rejected, and tell us whether to purge the forward
738 // cache, reverse cache, both, or neither.
739
740 purgeDirectionType purgeDir = NEITHER;
741
742 if (_ignoreUpstream && _ignoreDownstream) {
743 purgeDir = BOTH;
744 }
745 else if (_ignoreUpstream) {
746 if (_refDist) {
747 purgeDir = BOTH;
748 }
749 else if (_aDist && _qForward) {
750 if (_sameStrand) {
751 purgeDir = FORWARD_ONLY;
752 }
753 else if (_diffStrand) {
754 purgeDir = REVERSE_ONLY;
755 }
756 }
757 else if (_bDist) {
758 if (_qForward && _diffStrand) purgeDir = REVERSE_ONLY;
759 else if (_qReverse && _sameStrand) purgeDir = REVERSE_ONLY;
760 }
761 }
762 else if (_ignoreDownstream) {
763 // if refDist, do nothing. left hits can't be downstream.
764 if (_aDist) {
765 //if qForward, do nothing. left hits can't be downstream.
766 if (_qReverse) {
767 if (_sameStrand) purgeDir = REVERSE_ONLY;
768 else if (_diffStrand) purgeDir = FORWARD_ONLY;
769 }
770 } else if (_bDist) {
771 if (_qForward && _sameStrand) purgeDir = FORWARD_ONLY;
772 else if (_qReverse && _diffStrand) purgeDir = FORWARD_ONLY;
773 }
774 }
775
776 return purgeDir;
777
778 }
779