1 /*
2  * CloseSweep.cpp
3  *
4  *  Created on: Sep 25, 2014
5  *      Author: nek3d
6  */
7 
8 #include "CloseSweep.h"
9 #include "ContextClosest.h"
10 
RecDistList(int maxSize)11 RecDistList::RecDistList(int maxSize)
12 :  _kVal(maxSize),
13    _empty(true),
14    _currNumIdxs(0),
15    _totalRecs(0)
16 
17     {
18 
19 	_allRecs.resize(_kVal);
20 	for (int i=0; i < _kVal; i++) {
21 		_allRecs[i] = new elemsType();
22 	}
23 	_distIndex = new indexType[_kVal];
24 
25 	clear();
26 }
27 
~RecDistList()28 RecDistList::~RecDistList() {
29 	for (int i=0; i < _kVal; i++) {
30 		delete _allRecs[i];
31 	}
32 	delete _distIndex;
33 	_distIndex = NULL;
34 
35 }
36 
clear()37 void RecDistList::clear() {
38 	for (int i=0; i < _kVal; i++) {
39 		_allRecs[i]->clear();
40 		_allRecs[i]->reserve(16);
41 	}
42 	for (int i=0; i < _kVal; i++) {
43 		_distIndex[i].first = -1;
44 		_distIndex[i].second = -1;
45 	}
46 	_currNumIdxs = 0;
47 	_empty = true;
48 	_totalRecs = 0;
49 }
50 
addRec(CHRPOS dist,Record * record,chromDirType chromDir)51 bool RecDistList::addRec(CHRPOS dist, Record *record, chromDirType chromDir) {
52 	CHRPOS newPos = 0;
53 	bool mustAppend = false;
54 	int useElemIdx = 0;
55 
56 	if (dist > getMaxDist()) {
57 		//dist is bigger than any currently contained.
58 		if (_currNumIdxs == _kVal) {
59 			return false;
60 		}
61 		mustAppend = true;
62 		useElemIdx = _currNumIdxs;
63 		newPos = _currNumIdxs;
64 	}
65 	if (find(dist, newPos)) {
66 		_allRecs[_distIndex[newPos].second]->push_back(elemPairType(chromDir, record));
67 		_totalRecs++;
68 		return true;
69 	}
70 	if (!mustAppend) {
71 		//smaller than maxDist, and doesn't currently exist. must insert
72 		//newPos now is the insertion point.
73 		CHRPOS startShiftPos = 0;
74 		if (_currNumIdxs == _kVal) {
75 			//already full. must remove oldest max.
76 			//determine which vector it was using
77 			//so we can re-use it.
78 			startShiftPos = _kVal-1;
79 			useElemIdx = _distIndex[startShiftPos].second;
80 		} else {
81 			//can add a new element
82 			startShiftPos = _currNumIdxs;
83 			useElemIdx = _currNumIdxs;
84 			_currNumIdxs++;
85 		}
86 		for (CHRPOS i=startShiftPos; i > newPos; i--) {
87 			_distIndex[i].first = _distIndex[i-1].first;
88 			_distIndex[i].second = _distIndex[i-1].second;
89 		}
90 	} else {
91 		_currNumIdxs++;
92 	}
93 	_allRecs[useElemIdx]->clear();
94 	_allRecs[useElemIdx]->reserve(16);
95 	_allRecs[useElemIdx]->push_back(elemPairType(chromDir, record));
96 
97 	_distIndex[newPos].first = (int)dist;
98 	_distIndex[newPos].second = useElemIdx;
99 	_empty = false;
100 	_totalRecs++;
101 	return true;
102 }
103 
104 //if true, pos will be the idx the distance is at.
105 //if false, pos will be the idx to insert at.
find(CHRPOS dist,CHRPOS & pos) const106 bool RecDistList::find(CHRPOS dist, CHRPOS &pos) const {
107 	CHRPOS lbound=0, ubound=_currNumIdxs-1, currVal =0;
108 	pos = 0;
109 	while(lbound <= ubound)
110 	{
111 		pos = (lbound + ubound) / 2;
112 		currVal = _distIndex[pos].first;
113 		if (currVal == dist) {
114 			return true;
115 		}
116 		if (dist > currVal) {
117 			lbound = pos + 1;
118 		} else {
119 			ubound = pos -1;
120 		}
121 	}
122 	pos = (ubound == -1 ) ? 0 : (lbound == _currNumIdxs ? _currNumIdxs : lbound);
123 	return false;
124 }
125 
getMaxLeftEndPos() const126 CHRPOS RecDistList::getMaxLeftEndPos() const {
127 
128 	if (_empty) return -1;
129 
130 	CHRPOS maxDist =_distIndex[_currNumIdxs-1].first;
131 
132   const elemsType *elems = _allRecs[_distIndex[_currNumIdxs-1].second];
133 	for (int i=0; i < (int)elems->size(); i++) {
134     const elemPairType & elem = (*elems)[i];
135 		if (elem.first == LEFT) {
136 			return maxDist;
137 		}
138 	}
139 	return -1;
140 
141 }
142 
143 
CloseSweep(ContextClosest * context)144 CloseSweep::CloseSweep(ContextClosest *context)
145 :	NewChromSweep(context),
146  	_context(context),
147  	_kClosest(_context->getNumClosestHitsWanted()),
148 	_sameStrand(false),
149 	_diffStrand(false),
150 
151 	_refDist(false),
152 	_aDist(false),
153 	_bDist(false),
154 
155 	_ignoreUpstream(false),
156 	_ignoreDownstream(false),
157 
158 	_qForward(false),
159 	_qReverse(false),
160 	_dbForward(false),
161 	_dbReverse(false),
162 
163 	_tieMode(ContextClosest::ALL_TIES),
164 	_firstTie(false),
165 	_lastTie(false),
166 	_allTies(false)
167  	{
168 
169 	_minUpstreamRecs.resize(_numDBs, NULL);
170 	_minDownstreamRecs.resize(_numDBs, NULL);
171 	_overlapRecs.resize(_numDBs, NULL);
172 	_maxPrevLeftClosestEndPos.resize(_numDBs, 0);
173 	_maxPrevLeftClosestEndPosReverse.resize(_numDBs, 0);
174 
175 	for (int i=0; i < _numDBs; i++) {
176 		_minUpstreamRecs[i] = new RecDistList(_kClosest);
177 		_minDownstreamRecs[i] = new RecDistList(_kClosest);
178 		_overlapRecs[i] = new RecDistList(_kClosest);
179 	}
180 }
181 
~CloseSweep(void)182 CloseSweep::~CloseSweep(void) {
183 	for (int i=0; i < _numDBs; i++) {
184 		delete _minUpstreamRecs[i];
185 		delete _minDownstreamRecs[i];
186 		delete _overlapRecs[i];
187 	}
188 }
189 
init()190 bool CloseSweep::init() {
191 
192     bool retVal =  NewChromSweep::init();
193     _runToQueryEnd = true;
194 
195 	// Some abbreviations to make the code less miserable.
196 	_sameStrand = _context->getSameStrand();
197 	_diffStrand = _context->getDiffStrand();
198 
199 	_refDist = _context->getStrandedDistMode() == ContextClosest::REF_DIST;
200 	_aDist = _context->getStrandedDistMode() == ContextClosest::A_DIST;
201 	_bDist = _context->getStrandedDistMode() == ContextClosest::B_DIST;
202 
203 	_ignoreUpstream = _context->ignoreUpstream();
204 	_ignoreDownstream = _context->ignoreDownstream();
205 
206 	_tieMode = _context->getTieMode();
207 	_firstTie = _tieMode == ContextClosest::FIRST_TIE;
208 	_lastTie = _tieMode == ContextClosest::LAST_TIE;
209 	_allTies = _tieMode == ContextClosest::ALL_TIES;
210 
211 
212     return retVal;
213  }
214 
masterScan(RecordKeyVector & retList)215 void CloseSweep::masterScan(RecordKeyVector &retList) {
216 
217 	_qForward = _currQueryRec->getStrandVal() == Record::FORWARD;
218 	_qReverse = _currQueryRec->getStrandVal() == Record::REVERSE;
219 
220 	if (_currQueryChromName != _prevQueryChromName) testChromOrder(_currQueryRec);
221 	if (_context->reportDistance()) {
222 		_finalDistances.clear();
223 	}
224 
225 	for (int i=0; i < _numDBs; i++) {
226 
227 		//first clear out everything from the previous scan
228 		_minUpstreamRecs[i]->clear();
229 		_minDownstreamRecs[i]->clear();
230 		_overlapRecs[i]->clear();
231 
232 		if (dbFinished(i) || chromChange(i, retList, true)) {
233 			continue;
234 		} else {
235 
236 			// scan the database cache for hits
237 			scanCache(i, retList);
238 
239 			// skip if we hit the end of the DB
240 			// advance the db until we are ahead of the query. update hits and cache as necessary
241 			bool stopScanning = false;
242 			while (_currDbRecs[i] != NULL &&
243 					_currQueryRec->sameChrom(_currDbRecs[i]) &&
244 					!stopScanning) {
245 				if (considerRecord(_currDbRecs[i], i, stopScanning) == DELETE) {
246 					_dbFRMs[i]->deleteRecord(_currDbRecs[i]);
247 					_currDbRecs[i] = NULL;
248 				} else {
249 					_caches[i].push_back(_currDbRecs[i]);
250 					_currDbRecs[i] = NULL;
251 				}
252 				nextRecord(false, i);
253 			}
254 		}
255 		finalizeSelections(i, retList);
256 	}
257 	checkMultiDbs(retList);
258 }
259 
scanCache(int dbIdx,RecordKeyVector & retList)260 void CloseSweep::scanCache(int dbIdx, RecordKeyVector &retList) {
261 	recListIterType cacheIter = _caches[dbIdx].begin();
262     while (cacheIter != _caches[dbIdx].end())
263     {
264         Record *cacheRec = cacheIter->value();
265     	bool stopScanning = false;
266     	if (considerRecord(cacheRec, dbIdx, stopScanning) == DELETE) {
267             cacheIter = _caches[dbIdx].deleteCurrent();
268     		_dbFRMs[dbIdx]->deleteRecord(cacheRec);
269     	} else {
270             cacheIter = _caches[dbIdx].next();
271     	}
272     	if (stopScanning) break;
273     }
274 }
275 
276 
considerRecord(Record * cacheRec,int dbIdx,bool & stopScanning)277 CloseSweep::rateOvlpType CloseSweep::considerRecord(Record *cacheRec, int dbIdx, bool &stopScanning) {
278 
279 	// Determine whether the hit and query intersect, and if so, what to do about it.
280 	_dbForward = cacheRec->getStrandVal() == Record::FORWARD;
281 	_dbReverse = cacheRec->getStrandVal() == Record::REVERSE;
282 	CHRPOS currDist = 0;
283 
284 	if (intersects(_currQueryRec, cacheRec)) {
285 
286 		// HIT INTERSECTS QUERY
287 		return tryToAddRecord(cacheRec, 0, dbIdx, stopScanning, OVERLAP, INTERSECT);
288 
289 	} else if (cacheRec->after(_currQueryRec)) {
290 
291 		// HIT IS TO THE RIGHT OF THE QUERY.
292 
293 		 currDist = (cacheRec->getStartPos() - _currQueryRec->getEndPos()) + 1;
294 		 if (_context->signDistance()) {
295 			 if ((_aDist && _qReverse) ||
296 				 (_bDist && _dbForward))
297 				 {
298 					 // hit is "upstream" of A
299 					 return tryToAddRecord(cacheRec, abs(currDist), dbIdx, stopScanning, RIGHT, UPSTREAM);
300 				 }
301 		 }
302 		 // HIT IS DOWNSTREAM.
303 		 return tryToAddRecord(cacheRec, abs(currDist), dbIdx, stopScanning, RIGHT, DOWNSTREAM);
304 	 } else if (_currQueryRec->after(cacheRec)){
305 
306 		 // HIT IS TO THE LEFT OF THE QUERY.
307 		 currDist = (_currQueryRec->getStartPos() - cacheRec->getEndPos()) + 1;
308 		 if (_context->signDistance()) {
309 			 if ((_aDist && _qReverse) ||
310 				 (_bDist && _dbForward))
311 			 {
312 				 // HIT IS DOWNSTREAM.
313 				 return tryToAddRecord(cacheRec, abs(currDist), dbIdx, stopScanning, LEFT, DOWNSTREAM);
314 			 }
315 		 }
316 		 // hit is "UPSTREAM" of A
317 		 return tryToAddRecord(cacheRec, abs(currDist), dbIdx, stopScanning, LEFT, UPSTREAM);
318 	 }
319 	return IGNORE;
320 }
321 
finalizeSelections(int dbIdx,RecordKeyVector & retList)322 void CloseSweep::finalizeSelections(int dbIdx, RecordKeyVector &retList) {
323 
324 	// Determine whether the overlaps, upstream, or downstream records have
325 	// the k closest hits.
326 
327 	// The first thing to do is set the leftmost end pos used by records on the left.
328 	// This will control when the cache is purged during the next query's sweep.
329 	setLeftClosestEndPos(dbIdx);
330 
331 
332 
333 	RecDistList *upRecs = _minUpstreamRecs[dbIdx];
334 	RecDistList *downRecs = _minDownstreamRecs[dbIdx];
335 	RecDistList *overlaps = _overlapRecs[dbIdx];
336 	RecDistList::constIterType upIter = upRecs->begin();
337 	RecDistList::constIterType downIter = downRecs->begin();
338 	CHRPOS upDist = INT_MAX;
339 	CHRPOS downDist = INT_MAX;
340 
341 	int totalHitsUsed = 0;
342 
343 
344 	// If forcing upstream, use those first.
345 	if (_context->forceUpstream()) {
346 		//add upstream recs until all are used or K hits taken.
347 		while (upIter != upRecs->end() && totalHitsUsed < _kClosest) {
348 			upDist = upRecs->currDist(upIter);
349 			totalHitsUsed += addRecsToRetList(upRecs->allElems(upIter), 0 - upDist, retList);
350 			upIter++;
351 		}
352 	}
353 
354 	// If forcing downstream, use those first/next.
355 	if (_context->forceDownstream()) {
356 		while (downIter != downRecs->end() && totalHitsUsed < _kClosest) {
357 			downDist = downRecs->currDist(downIter);
358 			totalHitsUsed += addRecsToRetList(downRecs->allElems(downIter), downDist, retList);
359 			downIter++;
360 		}
361 	}
362 
363 
364 	//start with the overlaps, which will all have distance zero.
365 	if (totalHitsUsed < _kClosest && !overlaps->empty()) {
366 		//there are overlaps.
367 		totalHitsUsed += addRecsToRetList(overlaps->allElems(overlaps->begin()), 0, retList);
368 	}
369 
370 	//now check the upstream and downstream recs, starting with the beginning of each,
371 	//and grabbing whichever has the closest records as we work our way out to the
372 	//max dist. Continue until we have K records, or the reclists are empty.
373 
374 	while (totalHitsUsed < _kClosest) {
375 		upDist = INT_MAX;
376 		downDist = INT_MAX;
377 
378 		if (upIter != upRecs->end()) {
379 			upDist = upRecs->currDist(upIter);
380 		}
381 		if (downIter != downRecs->end()) {
382 			downDist = downRecs->currDist(downIter);
383 		}
384 
385 		//stop if no hits are left to consider
386 		if ((upDist == INT_MAX) && (downDist == INT_MAX)) break;
387 
388 		bool tie = upDist == downDist;
389 		bool usedUp = false;
390 		bool usedDown = false;
391 		if (upDist < downDist || (tie && !_lastTie)) {
392 			totalHitsUsed += addRecsToRetList(upRecs->allElems(upIter), 0 - upDist, retList);
393 			upIter++;
394 			usedUp = true;
395 		}
396 		if (downDist < upDist || (tie && !_firstTie)) {
397 			totalHitsUsed += addRecsToRetList(downRecs->allElems(downIter), downDist, retList);
398 			downIter++;
399 			usedDown = true;
400 		}
401 		if (tie) {
402 			// If there was a tie, but we didn't use both elements because of the tie mode,
403 			// then we still have to increment the iterator of the unused element so it
404 			// isn't used later.
405 			if (usedUp && !usedDown) {
406 				downIter++;
407 			} else if (usedDown && !usedUp) {
408 				upIter++;
409 			}
410 		}
411 	}
412 
413 }
414 
415 
416 
addRecsToRetList(RecDistList::elemsType * recs,CHRPOS currDist,RecordKeyVector & retList)417 int CloseSweep::addRecsToRetList(RecDistList::elemsType *recs, CHRPOS currDist, RecordKeyVector &retList) {
418 
419 	int hitsUsed = 0;
420 	int numRecs = (int)recs->size(); //just to clean the code some.
421 
422 
423 	if (_firstTie) {
424 		addSingleRec(recs->at(0).second, currDist, hitsUsed, retList);
425 		return 1;
426 
427 	} else if (_lastTie) {
428 		addSingleRec(recs->at(numRecs-1).second, currDist, hitsUsed, retList);
429 		return 1;
430 
431 	} else { //tieMode == ALL_TIES
432 
433 		for (int i=0; i < numRecs; i++) {
434 			addSingleRec(recs->at(i).second, currDist, hitsUsed, retList);
435 		}
436 		return numRecs;
437 	}
438 }
439 
addSingleRec(Record * rec,CHRPOS currDist,int & hitsUsed,RecordKeyVector & retList)440 void CloseSweep::addSingleRec(Record *rec, CHRPOS currDist, int &hitsUsed, RecordKeyVector &retList) {
441 	retList.push_back(rec);
442 	_finalDistances.push_back(currDist);
443 	hitsUsed++;
444 }
445 
checkMultiDbs(RecordKeyVector & retList)446 void CloseSweep::checkMultiDbs(RecordKeyVector &retList) {
447 //	//can skip this method if there's only one DB, or if we are
448 //	//resolving closest hits for each db instead of all of them
449 	if (_context->getMultiDbMode() != ContextClosest::ALL_DBS ||  _numDBs == 1) return;
450 
451 
452 	// Get the K closest hits among multiple databases,
453 	// while not counting ties more than once if the tieMode
454 	// is "first" or "last".
455 	// Start by entering  all hits and their absolute distances
456 	// into a vector of distance tuples, then sort it.
457 
458 	vector<distanceTuple> copyDists;
459 	int numHits = (int)retList.size();
460 	copyDists.resize(numHits);
461 	CHRPOS i=0;
462 	for (RecordKeyVector::iterator_type iter = retList.begin(); iter != retList.end(); iter++) {
463 		CHRPOS dist = _finalDistances[i];
464 		copyDists[i]._dist = abs(dist);
465 		copyDists[i]._rec = *iter;
466 		copyDists[i]._isNeg = dist < 0;
467 		i++;
468 	}
469 
470 	// sort the hits by distance
471 	sort(copyDists.begin(), copyDists.end(), DistanceTupleSortAscFunctor());
472 
473 	//now we want to build a map telling us what distances are tied,
474 	//and how many of each of these there are. Use a map<int, int>,
475 	//where the key is a distance (in absolute value) and the value
476 	//is the number of ties that that distance has.
477 	map<CHRPOS, CHRPOS> ties;
478 	for (vector<distanceTuple>::iterator i = copyDists.begin(); i != copyDists.end(); ++i)
479     	++ties[i->_dist];
480 
481 	// Clear the original list and distances, and re-populate
482 	// until we have the desired number of hits, skipping
483 	// over any unwanted ties.
484 	retList.clearVector();
485 	_finalDistances.clear();
486 
487 	int hitsUsed = 0;
488 	for (i=0; i < numHits && hitsUsed < _kClosest; i++) {
489 		CHRPOS dist = copyDists[i]._dist;
490 		bool isNeg = copyDists[i]._isNeg;
491 		//see if this distance is tied with any other
492 		map<CHRPOS, CHRPOS>::iterator iter = ties.find(dist);
493 		if (iter != ties.end()) {
494 			//tie was found
495 			CHRPOS numTies = iter->second;
496 			if (!_allTies) {
497 				if (_firstTie) {
498 					//just add the first of the ties
499 					addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
500 					i += numTies - 1; // use first, then skip ahead by the number of ties, minus 1 because
501 					//loop is about to be incremented
502 				} else { //tieMode == LAST_TIE. Just add the last of the ties.
503 					i += numTies -1;
504 					dist = copyDists[i]._dist;
505 					isNeg = copyDists[i]._isNeg;
506 					addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
507 				}
508 			} else {
509 				// tieMode is ALL_TIES, use all hits.
510 				for (int j = i; j < i + numTies; j++) {
511 					dist = copyDists[j]._dist;
512 					isNeg = copyDists[j]._isNeg;
513 					addSingleRec(copyDists[j]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
514 				}
515 				i += numTies - 1; //skip ahead by the number of ties, minus 1 because
516 				//loop is about to be incremented
517 			}
518 		} else {
519 			addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
520 		}
521 	}
522 }
523 
chromChange(int dbIdx,RecordKeyVector & retList,bool wantScan)524 bool CloseSweep::chromChange(int dbIdx, RecordKeyVector &retList, bool wantScan)
525 {
526 	Record *dbRec = _currDbRecs[dbIdx];
527 
528 	bool haveQuery = _currQueryRec != NULL;
529 	bool haveDB = dbRec != NULL;
530 
531 	if (haveQuery && _currQueryChromName != _prevQueryChromName) {
532 		_context->testNameConventions(_currQueryRec);
533 		testChromOrder(_currQueryRec);
534 	}
535 
536 	if (haveDB) {
537 		_context->testNameConventions(dbRec);
538 		testChromOrder(dbRec);
539 	}
540 
541     // the files are on the same chrom
542 	if (haveQuery && (!haveDB || _currQueryRec->sameChrom(dbRec))) {
543 
544 		//if this is the first time the query's chrom is ahead of the chrom that was in this cache,
545 		//then we have to clear the cache.
546 		if (!_caches[dbIdx].empty() && queryChromAfterDbRec(_caches[dbIdx].begin()->value())) {
547 			clearCache(dbIdx);
548 			clearClosestEndPos(dbIdx);
549 		}
550 		return false;
551 	}
552 
553 	if (!haveQuery || !haveDB) return false;
554 
555 	if (!_caches[dbIdx].empty() && (_caches[dbIdx].begin()->value()->sameChrom(_currQueryRec))) {
556 		//the newest DB record's chrom is ahead of the query, but the cache still
557 		//has old records on that query's chrom
558 		scanCache(dbIdx, retList);
559 		finalizeSelections(dbIdx, retList);
560 		return true;
561 	}
562 
563 
564 	// the query is ahead of the database. fast-forward the database to catch-up.
565 	if (queryChromAfterDbRec(dbRec)) {
566 		string oldDbChrom(dbRec->getChrName());
567 
568 		while (dbRec != NULL &&
569 				queryChromAfterDbRec(dbRec)) {
570 			_dbFRMs[dbIdx]->deleteRecord(dbRec);
571 			if (!nextRecord(false, dbIdx)) break;
572 			dbRec =  _currDbRecs[dbIdx];
573 			const string &newDbChrom = dbRec->getChrName();
574 			if (newDbChrom != oldDbChrom) {
575 				testChromOrder(dbRec);
576 				oldDbChrom = newDbChrom;
577 			}
578 		}
579 		clearCache(dbIdx);
580 		clearClosestEndPos(dbIdx);
581         return false;
582     }
583     // the database is ahead of the query.
584     else {
585         // 1. scan the cache for remaining hits on the query's current chrom.
586 		if (wantScan) scanCache(dbIdx, retList);
587 
588         return true;
589     }
590 
591 	//control can't reach here, but compiler still wants a return statement.
592 	return true;
593 }
594 
595 
setLeftClosestEndPos(int dbIdx)596 void CloseSweep::setLeftClosestEndPos(int dbIdx)
597 {
598 
599   //try to determine max end pos of hits to left of query.
600   //first check for exceptions due to options that cause
601   //complete rejection of all left side hits.
602 
603   //no records found to left of query,
604   //can't set purge point, except for some special cases.
605   purgeDirectionType purgeDir = purgePointException();
606   if (purgeDir == BOTH || purgeDir == FORWARD_ONLY) {
607     _maxPrevLeftClosestEndPos[dbIdx] = max(_currQueryRec->getStartPos(), _maxPrevLeftClosestEndPos[dbIdx]);
608   }
609   if (purgeDir == BOTH || purgeDir == REVERSE_ONLY) {
610     _maxPrevLeftClosestEndPosReverse[dbIdx] = max(_currQueryRec->getStartPos(), _maxPrevLeftClosestEndPosReverse[dbIdx]);
611   }
612   if (purgeDir != NEITHER) return;
613 
614   RecDistList *upRecs = _minUpstreamRecs[dbIdx];
615   RecDistList *downRecs = _minDownstreamRecs[dbIdx];
616 
617   CHRPOS upDist = upRecs->getMaxLeftEndPos();
618   CHRPOS downDist = downRecs->getMaxLeftEndPos();
619 
620 
621   if (upDist == -1 && downDist == -1) return;
622 
623 	CHRPOS leftMostEndPos = (_currQueryRec->getStartPos() - max(upDist, downDist)) +1;
624 	if ((!_sameStrand && !_diffStrand) ||
625 		(_sameStrand && _qForward) ||
626 		(_diffStrand && _qReverse))  {
627 
628 		_maxPrevLeftClosestEndPos[dbIdx] = max(leftMostEndPos, _maxPrevLeftClosestEndPos[dbIdx]);
629 	} else {
630 		_maxPrevLeftClosestEndPosReverse[dbIdx] = max(leftMostEndPos, _maxPrevLeftClosestEndPosReverse[dbIdx]);
631 	}
632 }
633 
beforeLeftClosestEndPos(int dbIdx,Record * rec)634 bool CloseSweep::beforeLeftClosestEndPos(int dbIdx, Record *rec)
635 {
636 	CHRPOS recEndPos = rec->getEndPos();
637 	CHRPOS prevPos = _maxPrevLeftClosestEndPos[dbIdx];
638 	CHRPOS prevPosReverse = _maxPrevLeftClosestEndPosReverse[dbIdx];
639 
640 	if (!_sameStrand && !_diffStrand) {
641 		return recEndPos < prevPos;
642 	} else {
643 		if (rec->getStrandVal() == Record::FORWARD) {
644 			return recEndPos < prevPos;
645 		} else {
646 			return recEndPos < prevPosReverse;
647 		}
648   }
649 	return false;
650 }
651 
clearClosestEndPos(int dbIdx)652 void CloseSweep::clearClosestEndPos(int dbIdx)
653 {
654 	_maxPrevLeftClosestEndPos[dbIdx] = 0;
655 	_maxPrevLeftClosestEndPosReverse[dbIdx] = 0;
656 }
657 
tryToAddRecord(Record * cacheRec,CHRPOS dist,int dbIdx,bool & stopScanning,chromDirType chromDir,streamDirType streamDir)658 CloseSweep::rateOvlpType CloseSweep::tryToAddRecord(Record *cacheRec, CHRPOS dist, int dbIdx, bool &stopScanning, chromDirType chromDir, streamDirType streamDir) {
659 
660 	//
661 	// Decide whether to ignore hit
662 	//
663 	// If want same strand, and they're unknown or not the same, ignore
664 	// If we want diff strand, and they're unknown or different, ignore.
665 	// If we want diff names and they're the same, ignore
666 	// If stream is unwanted, ignore.
667 	bool hasUnknownStrands = (_currQueryRec->getStrandVal() == Record::UNKNOWN || cacheRec->getStrandVal() == Record::UNKNOWN);
668 	bool wantedSameNotSame = (_sameStrand && (hasUnknownStrands || (_currQueryRec->getStrandVal() != cacheRec->getStrandVal())));
669 	bool wantedDiffNotDiff = (_diffStrand && (hasUnknownStrands || (_currQueryRec->getStrandVal() == cacheRec->getStrandVal())));
670 	bool badStrand = wantedSameNotSame || wantedDiffNotDiff;
671 	bool badNames = (_context->diffNames() && (cacheRec->getName() == _currQueryRec->getName()));
672 	bool badStream = (streamDir == UPSTREAM ? _ignoreUpstream : (streamDir == DOWNSTREAM ? _ignoreDownstream :  _context->ignoreOverlaps()));
673 
674 	bool shouldIgnore = badStrand || badNames || badStream;
675 
676 
677 	// You would think ignoring it means we could stop here, but even then,
678 	// hits on the left may need to be purged from the cache,
679 	// and hits on the right tell us when to stop scanning the cache.
680 
681 
682 	//establish which set of hits we are looking at.
683 	RecDistList *useList = (streamDir == UPSTREAM ? _minUpstreamRecs[dbIdx] : (streamDir == INTERSECT ? _overlapRecs[dbIdx] : _minDownstreamRecs[dbIdx]));
684 
685 
686 	if (chromDir == OVERLAP && !shouldIgnore) {
687 		useList->addRec(0, cacheRec, RecDistList::OVERLAP);
688 	}
689 
690 	else if (chromDir == LEFT) {
691 		if (beforeLeftClosestEndPos(dbIdx, cacheRec)) {
692 			return DELETE;
693 		}
694 		if (!shouldIgnore) {
695 			useList->addRec(dist, cacheRec, RecDistList::LEFT);
696 		}
697 	}
698 
699 	else if (chromDir == RIGHT) {
700 		//hit is to the right of query. Need to know when to stop scanning.
701 		// if NOT ignoring:
702 		//		if we're able to add it to the useList, definitely DON'T stop,
703 		//		hit was valid, so next one could be too.
704 		//		if we're UNABLE to add it to the uselist, definitely DO stop,
705 		//		because useList is full.
706 		// but if we ARE ignoring:
707 		//      can only stop in the rare cases where hits to the right
708 		//		are ALWAYS getting ignored, regardless of query and cache strand
709 		//      Otherwise, always continue.
710 		if (!shouldIgnore) {
711 			if (!useList->addRec(dist, cacheRec, RecDistList::RIGHT)) {
712 				stopScanning = true;
713 			}
714 		} else { // hit is ignored
715 			if (allHitsRightOfQueryIgnored()) {
716 				stopScanning = true;
717 			}
718 		}
719 	}
720 	return IGNORE;
721 }
722 
allHitsRightOfQueryIgnored()723 bool CloseSweep::allHitsRightOfQueryIgnored() {
724 	return ((_refDist && _ignoreDownstream) ||
725 			(_aDist && ((_ignoreUpstream && _qReverse) || (_ignoreDownstream && _qForward))) ||
726 			(_bDist && ((_ignoreDownstream && ((_qForward && _diffStrand) || (_qReverse && _sameStrand))) ||
727 					((_ignoreUpstream && ((_qReverse && _diffStrand) || (_qForward && _sameStrand)))))));
728 }
729 
730 
purgePointException()731 CloseSweep::purgeDirectionType CloseSweep::purgePointException() {
732 
733 	// Normally, we can't set a cache purge point if there are no
734 	// records to the left of the query.
735 
736 	// This method will detect use cases that cause all of the left side
737 	// hits to have been rejected, and tell us whether to purge the forward
738 	// cache, reverse cache, both, or neither.
739 
740 	purgeDirectionType purgeDir = NEITHER;
741 
742 	if (_ignoreUpstream && _ignoreDownstream) {
743 		purgeDir = BOTH;
744 	}
745 	else if (_ignoreUpstream) {
746 		if (_refDist) {
747 			purgeDir = BOTH;
748 		}
749 		else if (_aDist && _qForward) {
750 			if (_sameStrand) {
751 				purgeDir = FORWARD_ONLY;
752 			}
753 			else if (_diffStrand) {
754 				purgeDir = REVERSE_ONLY;
755 			}
756 		}
757 		else if (_bDist) {
758 			  if (_qForward && _diffStrand) purgeDir = REVERSE_ONLY;
759 			  else if (_qReverse && _sameStrand) purgeDir = REVERSE_ONLY;
760 		}
761 	}
762 	else if (_ignoreDownstream) {
763 		 // if refDist, do nothing. left hits can't be downstream.
764 		 if (_aDist) {
765 			//if qForward, do nothing. left hits can't be downstream.
766 			if (_qReverse) {
767 				if (_sameStrand) purgeDir = REVERSE_ONLY;
768 				else if (_diffStrand) purgeDir = FORWARD_ONLY;
769 			}
770 		} else if (_bDist) {
771 			if (_qForward && _sameStrand) purgeDir = FORWARD_ONLY;
772 			else if (_qReverse && _diffStrand) purgeDir = FORWARD_ONLY;
773 		}
774 	}
775 
776 	return purgeDir;
777 
778 }
779