1 /* 2 * CloseSweep.h 3 * 4 * Created on: Sep 25, 2014 5 * Author: nek3d 6 */ 7 8 #ifndef CLOSESWEEP_H_ 9 #define CLOSESWEEP_H_ 10 11 #include "NewChromsweep.h" 12 #include <list> 13 #include <set> 14 15 #include "ContextClosest.h" 16 17 class distanceTuple { 18 public: distanceTuple()19 distanceTuple() : _dist(0), _rec(NULL), _isNeg(false) {} _dist(dist)20 distanceTuple(int dist, Record *rec, bool isNeg = false) : _dist(dist), _rec(rec), _isNeg(isNeg) {} 21 int _dist; 22 Record *_rec; 23 bool _isNeg; 24 }; 25 26 class DistanceTupleSortAscFunctor { 27 public: operator()28 bool operator()(const distanceTuple & d1, const distanceTuple & d2) const { 29 return (d1._dist < d2._dist ? true : (d1._dist == d2._dist ? d1._rec->lessThan(d2._rec) : false)); 30 } 31 }; 32 33 34 class RecDistList { 35 public: 36 typedef enum { LEFT, OVERLAP, RIGHT } chromDirType; 37 RecDistList(int maxSize); 38 ~RecDistList(); empty()39 bool empty() const { return _empty; } 40 void clear(); uniqueSize()41 int uniqueSize() const { return _currNumIdxs; } totalSize()42 size_t totalSize() const { return _totalRecs; } 43 bool addRec(CHRPOS dist, Record *, chromDirType chromDir); exists(CHRPOS dist)44 bool exists(CHRPOS dist) const { 45 CHRPOS dummyVal = 0; 46 return find(dist, dummyVal); 47 } 48 typedef pair<chromDirType, Record *> elemPairType; 49 typedef vector<elemPairType> elemsType; 50 typedef pair<int, int> indexType; 51 getMaxDist()52 int getMaxDist() const { return _empty ? 0 : _distIndex[_currNumIdxs-1].first; } 53 typedef int constIterType; //used to be a map iter, trying not to change interface too much. begin()54 constIterType begin() const { return 0; } end()55 constIterType end() const { return _currNumIdxs; } currDist(constIterType iter)56 int currDist(constIterType iter) const { return _distIndex[iter].first; } currNumElems(constIterType iter)57 size_t currNumElems(constIterType iter) const { return allElems(iter)->size(); } allElems(constIterType iter)58 elemsType *allElems(constIterType iter) const { return _allRecs[_distIndex[iter].second]; } 59 CHRPOS getMaxLeftEndPos() const; 60 61 private: 62 63 void insert(CHRPOS dist, Record *, chromDirType chromDir); 64 65 66 //if true, pos will be the idx the distance is at. 67 //if false, pos will be the idx to insert at. 68 bool find(CHRPOS dist, CHRPOS &pos) const; 69 70 71 int _kVal; //max unique allowed 72 bool _empty; 73 int _currNumIdxs; 74 int _totalRecs; 75 76 vector<elemsType *> _allRecs; 77 indexType * _distIndex; 78 }; 79 80 class CloseSweep : public NewChromSweep { 81 public: 82 CloseSweep(ContextClosest *context); 83 ~CloseSweep(void); 84 bool init(); getDistances()85 const vector<CHRPOS> &getDistances() { return _finalDistances; } 86 87 private: 88 ContextClosest *_context; 89 int _kClosest; // how many closest hits we want to each query. 90 vector<RecDistList *> _minUpstreamRecs; 91 vector<RecDistList *> _minDownstreamRecs; 92 vector<RecDistList *> _overlapRecs; 93 vector<CHRPOS> _maxPrevLeftClosestEndPos; 94 vector<CHRPOS> _maxPrevLeftClosestEndPosReverse; 95 96 vector<CHRPOS> _finalDistances; 97 98 99 // 100 // Some abbreviations to make the code less miserable. 101 // 102 bool _sameStrand; 103 bool _diffStrand; 104 105 bool _refDist; 106 bool _aDist; 107 bool _bDist; 108 109 bool _ignoreUpstream; 110 bool _ignoreDownstream; 111 112 bool _qForward; 113 bool _qReverse; 114 bool _dbForward; 115 bool _dbReverse; 116 117 ContextClosest::tieModeType _tieMode; 118 bool _firstTie; 119 bool _lastTie; 120 bool _allTies; 121 122 bool allHitsRightOfQueryIgnored(); //true if, no matter what the strands 123 // of the hit and query are, we'd ignore the hit so long as it's on the right 124 // of the query. Set only during initilization, this is strictly a function 125 // of the user provided arguments. Ex: -D ref -id 126 127 128 129 //structs to help with finding closest among all of multiple dbs. 130 RecordKeyVector _copyRetList; 131 vector<CHRPOS> _copyDists; 132 133 //override these methods from chromsweep 134 void masterScan(RecordKeyVector &retList); 135 void scanCache(int dbIdx, RecordKeyVector &retList); 136 bool chromChange(int dbIdx, RecordKeyVector &retList, bool wantScan); 137 138 139 typedef enum { IGNORE, DELETE } rateOvlpType; 140 rateOvlpType considerRecord(Record *cacheRec, int dbIdx, bool &stopScanning); 141 void finalizeSelections(int dbIdx, RecordKeyVector &retList); 142 void checkMultiDbs(RecordKeyVector &retList); 143 144 typedef enum { LEFT, OVERLAP, RIGHT } chromDirType; 145 typedef enum { UPSTREAM, INTERSECT, DOWNSTREAM } streamDirType; 146 typedef enum { NEITHER, FORWARD_ONLY, REVERSE_ONLY, BOTH } purgeDirectionType; 147 148 void setLeftClosestEndPos(int dbIdx); 149 bool beforeLeftClosestEndPos(int dbIdx, Record *rec); 150 void clearClosestEndPos(int dbIdx); 151 int addRecsToRetList(RecDistList::elemsType *recs, CHRPOS currDist, RecordKeyVector &retList); 152 void addSingleRec(Record *rec, CHRPOS currDist, int &hitsUsed, RecordKeyVector &retList); 153 rateOvlpType tryToAddRecord(Record *cacheRec, CHRPOS dist, int dbIdx, bool &stopScanning, chromDirType chromDir, streamDirType streamDir); 154 purgeDirectionType purgePointException(); 155 156 }; 157 158 159 #endif /* CLOSESWEEP_H_ */ 160