1 /*
2  * CloseSweep.h
3  *
4  *  Created on: Sep 25, 2014
5  *      Author: nek3d
6  */
7 
8 #ifndef CLOSESWEEP_H_
9 #define CLOSESWEEP_H_
10 
11 #include "NewChromsweep.h"
12 #include <list>
13 #include <set>
14 
15 #include "ContextClosest.h"
16 
17 class distanceTuple {
18 public:
distanceTuple()19 	distanceTuple() : _dist(0), _rec(NULL), _isNeg(false) {}
_dist(dist)20 	distanceTuple(int dist, Record *rec, bool isNeg = false) : _dist(dist), _rec(rec), _isNeg(isNeg) {}
21 	int _dist;
22 	Record *_rec;
23 	bool _isNeg;
24 };
25 
26 class DistanceTupleSortAscFunctor {
27 public:
operator()28 	bool operator()(const distanceTuple & d1, const distanceTuple & d2) const {
29 		return (d1._dist < d2._dist ? true : (d1._dist == d2._dist ? d1._rec->lessThan(d2._rec) : false));
30 	}
31 };
32 
33 
34 class RecDistList {
35 public:
36     typedef enum { LEFT, OVERLAP, RIGHT } chromDirType;
37 	RecDistList(int maxSize);
38 	~RecDistList();
empty()39 	bool empty() const { return _empty; }
40 	void clear();
uniqueSize()41 	int uniqueSize() const { return _currNumIdxs; }
totalSize()42 	size_t totalSize() const { return _totalRecs; }
43 	bool addRec(CHRPOS dist, Record *, chromDirType chromDir);
exists(CHRPOS dist)44 	bool exists(CHRPOS dist) const {
45 		CHRPOS dummyVal = 0;
46 		return find(dist, dummyVal);
47 	}
48 	typedef pair<chromDirType, Record *> elemPairType;
49 	typedef vector<elemPairType> elemsType;
50 	typedef pair<int, int> indexType;
51 
getMaxDist()52 	int getMaxDist() const { return _empty ? 0 : _distIndex[_currNumIdxs-1].first; }
53 	typedef int constIterType; //used to be a map iter, trying not to change interface too much.
begin()54 	constIterType begin() const { return 0; }
end()55 	constIterType end() const { return _currNumIdxs; }
currDist(constIterType iter)56 	int currDist(constIterType iter) const { return _distIndex[iter].first; }
currNumElems(constIterType iter)57 	size_t currNumElems(constIterType iter) const { return allElems(iter)->size(); }
allElems(constIterType iter)58 	elemsType *allElems(constIterType iter) const { return _allRecs[_distIndex[iter].second]; }
59 	CHRPOS getMaxLeftEndPos() const;
60 
61 private:
62 
63 	void insert(CHRPOS dist, Record *, chromDirType chromDir);
64 
65 
66 	//if true, pos will be the idx the distance is at.
67 	//if false, pos will be the idx to insert at.
68 	bool find(CHRPOS dist, CHRPOS &pos) const;
69 
70 
71 	int _kVal; //max unique allowed
72 	bool _empty;
73 	int _currNumIdxs;
74 	int _totalRecs;
75 
76 	vector<elemsType *> _allRecs;
77 	indexType * _distIndex;
78 };
79 
80 class CloseSweep : public NewChromSweep {
81 public:
82 	CloseSweep(ContextClosest *context);
83 	~CloseSweep(void);
84 	bool init();
getDistances()85 	const vector<CHRPOS> &getDistances() { return _finalDistances; }
86 
87 private:
88    ContextClosest *_context;
89    int _kClosest; // how many closest hits we want to each query.
90 	vector<RecDistList *> _minUpstreamRecs;
91 	vector<RecDistList *> _minDownstreamRecs;
92 	vector<RecDistList *> _overlapRecs;
93 	vector<CHRPOS> _maxPrevLeftClosestEndPos;
94 	vector<CHRPOS> _maxPrevLeftClosestEndPosReverse;
95 
96 	vector<CHRPOS> _finalDistances;
97 
98 
99 	//
100 	// Some abbreviations to make the code less miserable.
101 	//
102 	bool _sameStrand;
103 	bool _diffStrand;
104 
105 	bool _refDist;
106 	bool _aDist;
107 	bool _bDist;
108 
109 	bool _ignoreUpstream;
110 	bool _ignoreDownstream;
111 
112 	bool _qForward;
113 	bool _qReverse;
114 	bool _dbForward;
115 	bool _dbReverse;
116 
117 	ContextClosest::tieModeType _tieMode;
118 	bool _firstTie;
119 	bool _lastTie;
120 	bool _allTies;
121 
122 	bool allHitsRightOfQueryIgnored(); //true if, no matter what the strands
123 	// of the hit and query are, we'd ignore the hit so long as it's on the right
124 	// of the query. Set only during initilization, this is strictly a function
125 	// of the user provided arguments. Ex: -D ref -id
126 
127 
128 
129 	//structs to help with finding closest among all of multiple dbs.
130 	RecordKeyVector _copyRetList;
131 	vector<CHRPOS> _copyDists;
132 
133 	//override these methods from chromsweep
134 	void masterScan(RecordKeyVector &retList);
135     void scanCache(int dbIdx, RecordKeyVector &retList);
136     bool chromChange(int dbIdx, RecordKeyVector &retList, bool wantScan);
137 
138 
139  	typedef enum { IGNORE, DELETE } rateOvlpType;
140     rateOvlpType considerRecord(Record *cacheRec, int dbIdx, bool &stopScanning);
141     void finalizeSelections(int dbIdx, RecordKeyVector &retList);
142     void checkMultiDbs(RecordKeyVector &retList);
143 
144     typedef enum { LEFT, OVERLAP, RIGHT } chromDirType;
145     typedef enum { UPSTREAM, INTERSECT, DOWNSTREAM } streamDirType;
146     typedef enum { NEITHER, FORWARD_ONLY, REVERSE_ONLY, BOTH } purgeDirectionType;
147 
148     void setLeftClosestEndPos(int dbIdx);
149     bool beforeLeftClosestEndPos(int dbIdx, Record *rec);
150     void clearClosestEndPos(int dbIdx);
151     int addRecsToRetList(RecDistList::elemsType *recs, CHRPOS currDist, RecordKeyVector &retList);
152     void addSingleRec(Record *rec, CHRPOS currDist, int &hitsUsed, RecordKeyVector &retList);
153     rateOvlpType tryToAddRecord(Record *cacheRec, CHRPOS dist, int dbIdx, bool &stopScanning, chromDirType chromDir, streamDirType streamDir);
154     purgeDirectionType purgePointException();
155 
156 };
157 
158 
159 #endif /* CLOSESWEEP_H_ */
160