1 /*
2  * Record.h
3  *
4  *  Created on: Nov 8, 2012
5  *      Author: nek3d
6  */
7 
8 #ifndef RECORD_H_
9 #define RECORD_H_
10 
11 #include <string>
12 #include "BedtoolsTypes.h"
13 #include "FreeList.h"
14 #include "string.h"
15 #include "FileRecordTypeChecker.h"
16 
17 
18 using namespace std;
19 
20 class FileRecordMgr;
21 class FileReader;
22 class ChromIdLookup;
23 
24 class Record {
25 public:
26 	friend class RecordMgr;
27 	friend class RecordOutputMgr;
28 
29 	friend class FreeList<Record>;
30 
31 	virtual ~Record(); //by making the destructor protected, only the friend class(es) can actually delete Record objects, or objects derived from Record.
32 
33 	typedef enum { FORWARD, REVERSE, UNKNOWN } strandType;
34 	Record();
35 	virtual bool initFromFile(FileReader *) =0;
36 	virtual void clear();
print(string &)37 	virtual void print(string &) const {}
print(string &,CHRPOS,CHRPOS)38 	virtual void print(string &, CHRPOS, CHRPOS ) const {}
print(string &,const string &,const string &)39 	virtual void print(string &, const string &, const string &) const {}
40 	virtual void print(FILE *fp, bool newline = false) const;
printNull(string &)41 	virtual void printNull(string &) const {}
42 	friend ostream &operator << (ostream &out, const Record &record);
43 
44 	virtual const Record & operator=(const Record &);
45 
isZeroBased()46 	virtual bool isZeroBased() const {return true;};
47 
setValid(const bool valid)48 	virtual void setValid(const bool valid)  { _isValidHit = valid; }
isValid()49 	virtual bool isValid() const { return _isValidHit; }
50 
getChrName()51 	virtual const string &getChrName() const { return _chrName; }
setChrName(const string & chr)52 	virtual void setChrName(const string &chr) { _chrName = chr; }
setChrName(const char * chr)53 	virtual void setChrName(const char *chr) { _chrName = chr; }
54 
getFileIdx()55 	virtual int getFileIdx() const { return _fileIdx; }
setFileIdx(int fileIdx)56 	virtual void setFileIdx(int fileIdx) { _fileIdx = fileIdx; }
57 
getChromId()58 	virtual int getChromId() const { return _chrId; }
setChromId(int id)59 	virtual void setChromId(int id) { _chrId = id; }
60 
getStartPos()61 	virtual CHRPOS getStartPos() const { return _startPos; }
setStartPos(CHRPOS startPos)62 	virtual void setStartPos(CHRPOS startPos) { _startPos = startPos; }
getStartPosStr()63 	virtual const string &getStartPosStr() const { return _startPosStr; }
setStartPosStr(const string & str)64 	virtual void setStartPosStr(const string &str) { _startPosStr = str; }
65 
getEndPos()66 	virtual CHRPOS getEndPos() const { return _endPos; }
setEndPos(CHRPOS endPos)67 	virtual void setEndPos(CHRPOS endPos) { _endPos = endPos; }
getEndPosStr()68 	virtual const string &getEndPosStr() const { return _endPosStr; }
setEndPosStr(const string & str)69 	virtual void setEndPosStr(const string &str) { _endPosStr = str; }
70 
getZeroLength()71 	virtual bool getZeroLength() const { return _zeroLength; }
setZeroLength(bool val)72 	virtual void setZeroLength(bool val) { _zeroLength = val; }
73 
getStrand()74 	virtual const string &getStrand() const { return _strand; }
setStrand(const string & val)75 	virtual void setStrand(const string &val) {
76 		_strand = val;
77 		_strandVal = (val == "+" ? FORWARD : (val == "-" ? REVERSE : UNKNOWN));
78 	}
setStrand(char val)79 	virtual void setStrand(char val) { _strand = val;
80 		_strandVal = (val == '+' ? FORWARD : (val == '-' ? REVERSE : UNKNOWN));
81 	}
adjustStrandVal()82 	virtual void adjustStrandVal() {
83 		_strandVal = (_strand == "+" ? FORWARD : (_strand == "-" ? REVERSE : UNKNOWN));
84 	}
85 
getStrandVal()86 	virtual strandType getStrandVal() const {return _strandVal; }
87 
getName()88 	virtual const string &getName() const { return _name; }
setName(const string & name)89 	virtual void setName(const string &name) { _name = name; }
setName(const char * chr)90 	virtual void setName(const char *chr) { _name = chr; }
91 
getScore()92 	virtual const string &getScore() const { return _score; }
setScore(const string & score)93 	virtual void setScore(const string &score) { _score = score; }
setScore(const char * chr)94 	virtual void setScore(const char *chr) { _score = chr; }
95 
96 	virtual const string &getField(int fieldNum) const;
97 	virtual int getNumFields() const  = 0;
98 
getType()99 	virtual FileRecordTypeChecker::RECORD_TYPE getType() const { return FileRecordTypeChecker::UNKNOWN_RECORD_TYPE; }
100 
101 	virtual bool coordsValid(); //test that no coords negative, end not less than start, check zeroLength (see below).
102 
103 	//Some files can have insertions of the form 2,2. If found this should translate to cover the base before and after,
104 	//thus meaning the startPos is decremented and the endPos is incremented. This method will find and handle that case.
105 	//Don't adjust the startPosStr and endPosStr strings because they aren't used in
106 	//calculation. They're only used in output, and it would be slower to change them
107 	//and then change them back.
108 	virtual void adjustZeroLength();
109 	virtual void undoZeroLength(); //change it back just before output;
isZeroLength()110 	virtual bool isZeroLength() const { return _zeroLength; }
111 
112 	// "Unmapped" only applies to BamRecord, but for design reasons, it has to be here,
113 	// because we want to short circuit the intersects method if either record is an unmapped
114 	// Bam record.
isUnmapped()115 	bool isUnmapped() const { return _isUnmapped; }
isMateUnmapped()116 	bool isMateUnmapped() const { return _isMateUnmapped; }
printUnmapped(string &)117 	virtual void printUnmapped(string &) const {}
118 
119 
120 
121 	virtual bool operator < (const Record &other) const;
122 	virtual bool operator > (const Record &other) const;
123 	virtual bool lessThan(const Record *other) const;
124 	virtual bool greaterThan(const Record *other) const;
125 
126 	//is this on the same chromosome as another record?
127 	bool sameChrom(const Record *other) const;
128 	bool chromBefore(const Record *other) const;
129 	bool chromAfter(const Record *other) const;
130 
131 	//is this record after the other one?
132 	virtual bool after(const Record *other) const;
133 
134 	//does this record intersect with another record?
135 	virtual bool intersects(const Record *otherRecord,
136 							bool sameStrand,
137 							bool diffStrand,
138 							float overlapFractionA,
139 							float overlapFractionB,
140 							bool reciprocalFraction,
141 						    bool eitherFraction,
142 						    bool obeySplits) const;
143 
144 	// *** WARNING !!! ** sameChromIntersects is a faster version of the intersects method,
145 	// BUT the caller MUST ensure that the records are on the same
146 	//chromosome. If you're not absolutely sure, use the regular intersects method.
147 	virtual bool sameChromIntersects(const Record *otherRecord,
148 									 bool sameStrand,
149 									 bool diffStrand,
150 									 float overlapFractionA,
151 									 float overlapFractionB,
152 									 bool reciprocalFraction,
153 									 bool eitherFraction,
154 									 bool obeySplits) const;
155 
156 //	virtual static bool isNumericField(int fieldNum) const = 0;
157 
158 	bool hasChrInChromName() const;
159 	bool hasLeadingZeroInChromName(bool chrKnown = false) const;
160 	virtual CHRPOS getLength(bool obeySplits) const;
161 
162 	void setFileRecordManager(FileRecordMgr *frm);
163 	FileRecordMgr * getFileRecordManager();
164 
165 	vector<int> block_starts;
166 	vector<int> block_ends;
167 
168 protected:
169 
170 	int _fileIdx; //associated file the record came from
171 	string _chrName;
172 	int _chrId;
173 	CHRPOS _startPos;
174 	CHRPOS _endPos;
175 	//It is actually faster to also store the start and end positions as their original strings than to
176 	//have to convert their integer representations back to strings when printing them.
177 	string _startPosStr;
178 	string _endPosStr;
179 	string _name;
180 	string _score;
181 	string _strand;
182 	strandType _strandVal;
183 	bool _zeroLength;
184 	bool _isUnmapped;
185 	bool _isMateUnmapped;
186 	bool _isValidHit;
187 	FileRecordMgr *_frm;
188 };
189 
190 class RecordPtrSortAscFunctor {
191 public:
operator()192 	bool operator()(const Record *rec1, const Record *rec2) const { return *rec1 < *rec2; }
193 };
194 
195 class RecordPtrSortDescFunctor {
196 public:
operator()197 	bool operator()(const Record *rec1, const Record *rec2) const { return *rec1 > *rec2; }
198 };
199 #endif /* RECORD_H_ */
200