1 /* 2 * Record.h 3 * 4 * Created on: Nov 8, 2012 5 * Author: nek3d 6 */ 7 8 #ifndef RECORD_H_ 9 #define RECORD_H_ 10 11 #include <string> 12 #include "BedtoolsTypes.h" 13 #include "FreeList.h" 14 #include "string.h" 15 #include "FileRecordTypeChecker.h" 16 17 18 using namespace std; 19 20 class FileRecordMgr; 21 class FileReader; 22 class ChromIdLookup; 23 24 class Record { 25 public: 26 friend class RecordMgr; 27 friend class RecordOutputMgr; 28 29 friend class FreeList<Record>; 30 31 virtual ~Record(); //by making the destructor protected, only the friend class(es) can actually delete Record objects, or objects derived from Record. 32 33 typedef enum { FORWARD, REVERSE, UNKNOWN } strandType; 34 Record(); 35 virtual bool initFromFile(FileReader *) =0; 36 virtual void clear(); print(string &)37 virtual void print(string &) const {} print(string &,CHRPOS,CHRPOS)38 virtual void print(string &, CHRPOS, CHRPOS ) const {} print(string &,const string &,const string &)39 virtual void print(string &, const string &, const string &) const {} 40 virtual void print(FILE *fp, bool newline = false) const; printNull(string &)41 virtual void printNull(string &) const {} 42 friend ostream &operator << (ostream &out, const Record &record); 43 44 virtual const Record & operator=(const Record &); 45 isZeroBased()46 virtual bool isZeroBased() const {return true;}; 47 setValid(const bool valid)48 virtual void setValid(const bool valid) { _isValidHit = valid; } isValid()49 virtual bool isValid() const { return _isValidHit; } 50 getChrName()51 virtual const string &getChrName() const { return _chrName; } setChrName(const string & chr)52 virtual void setChrName(const string &chr) { _chrName = chr; } setChrName(const char * chr)53 virtual void setChrName(const char *chr) { _chrName = chr; } 54 getFileIdx()55 virtual int getFileIdx() const { return _fileIdx; } setFileIdx(int fileIdx)56 virtual void setFileIdx(int fileIdx) { _fileIdx = fileIdx; } 57 getChromId()58 virtual int getChromId() const { return _chrId; } setChromId(int id)59 virtual void setChromId(int id) { _chrId = id; } 60 getStartPos()61 virtual CHRPOS getStartPos() const { return _startPos; } setStartPos(CHRPOS startPos)62 virtual void setStartPos(CHRPOS startPos) { _startPos = startPos; } getStartPosStr()63 virtual const string &getStartPosStr() const { return _startPosStr; } setStartPosStr(const string & str)64 virtual void setStartPosStr(const string &str) { _startPosStr = str; } 65 getEndPos()66 virtual CHRPOS getEndPos() const { return _endPos; } setEndPos(CHRPOS endPos)67 virtual void setEndPos(CHRPOS endPos) { _endPos = endPos; } getEndPosStr()68 virtual const string &getEndPosStr() const { return _endPosStr; } setEndPosStr(const string & str)69 virtual void setEndPosStr(const string &str) { _endPosStr = str; } 70 getZeroLength()71 virtual bool getZeroLength() const { return _zeroLength; } setZeroLength(bool val)72 virtual void setZeroLength(bool val) { _zeroLength = val; } 73 getStrand()74 virtual const string &getStrand() const { return _strand; } setStrand(const string & val)75 virtual void setStrand(const string &val) { 76 _strand = val; 77 _strandVal = (val == "+" ? FORWARD : (val == "-" ? REVERSE : UNKNOWN)); 78 } setStrand(char val)79 virtual void setStrand(char val) { _strand = val; 80 _strandVal = (val == '+' ? FORWARD : (val == '-' ? REVERSE : UNKNOWN)); 81 } adjustStrandVal()82 virtual void adjustStrandVal() { 83 _strandVal = (_strand == "+" ? FORWARD : (_strand == "-" ? REVERSE : UNKNOWN)); 84 } 85 getStrandVal()86 virtual strandType getStrandVal() const {return _strandVal; } 87 getName()88 virtual const string &getName() const { return _name; } setName(const string & name)89 virtual void setName(const string &name) { _name = name; } setName(const char * chr)90 virtual void setName(const char *chr) { _name = chr; } 91 getScore()92 virtual const string &getScore() const { return _score; } setScore(const string & score)93 virtual void setScore(const string &score) { _score = score; } setScore(const char * chr)94 virtual void setScore(const char *chr) { _score = chr; } 95 96 virtual const string &getField(int fieldNum) const; 97 virtual int getNumFields() const = 0; 98 getType()99 virtual FileRecordTypeChecker::RECORD_TYPE getType() const { return FileRecordTypeChecker::UNKNOWN_RECORD_TYPE; } 100 101 virtual bool coordsValid(); //test that no coords negative, end not less than start, check zeroLength (see below). 102 103 //Some files can have insertions of the form 2,2. If found this should translate to cover the base before and after, 104 //thus meaning the startPos is decremented and the endPos is incremented. This method will find and handle that case. 105 //Don't adjust the startPosStr and endPosStr strings because they aren't used in 106 //calculation. They're only used in output, and it would be slower to change them 107 //and then change them back. 108 virtual void adjustZeroLength(); 109 virtual void undoZeroLength(); //change it back just before output; isZeroLength()110 virtual bool isZeroLength() const { return _zeroLength; } 111 112 // "Unmapped" only applies to BamRecord, but for design reasons, it has to be here, 113 // because we want to short circuit the intersects method if either record is an unmapped 114 // Bam record. isUnmapped()115 bool isUnmapped() const { return _isUnmapped; } isMateUnmapped()116 bool isMateUnmapped() const { return _isMateUnmapped; } printUnmapped(string &)117 virtual void printUnmapped(string &) const {} 118 119 120 121 virtual bool operator < (const Record &other) const; 122 virtual bool operator > (const Record &other) const; 123 virtual bool lessThan(const Record *other) const; 124 virtual bool greaterThan(const Record *other) const; 125 126 //is this on the same chromosome as another record? 127 bool sameChrom(const Record *other) const; 128 bool chromBefore(const Record *other) const; 129 bool chromAfter(const Record *other) const; 130 131 //is this record after the other one? 132 virtual bool after(const Record *other) const; 133 134 //does this record intersect with another record? 135 virtual bool intersects(const Record *otherRecord, 136 bool sameStrand, 137 bool diffStrand, 138 float overlapFractionA, 139 float overlapFractionB, 140 bool reciprocalFraction, 141 bool eitherFraction, 142 bool obeySplits) const; 143 144 // *** WARNING !!! ** sameChromIntersects is a faster version of the intersects method, 145 // BUT the caller MUST ensure that the records are on the same 146 //chromosome. If you're not absolutely sure, use the regular intersects method. 147 virtual bool sameChromIntersects(const Record *otherRecord, 148 bool sameStrand, 149 bool diffStrand, 150 float overlapFractionA, 151 float overlapFractionB, 152 bool reciprocalFraction, 153 bool eitherFraction, 154 bool obeySplits) const; 155 156 // virtual static bool isNumericField(int fieldNum) const = 0; 157 158 bool hasChrInChromName() const; 159 bool hasLeadingZeroInChromName(bool chrKnown = false) const; 160 virtual CHRPOS getLength(bool obeySplits) const; 161 162 void setFileRecordManager(FileRecordMgr *frm); 163 FileRecordMgr * getFileRecordManager(); 164 165 vector<int> block_starts; 166 vector<int> block_ends; 167 168 protected: 169 170 int _fileIdx; //associated file the record came from 171 string _chrName; 172 int _chrId; 173 CHRPOS _startPos; 174 CHRPOS _endPos; 175 //It is actually faster to also store the start and end positions as their original strings than to 176 //have to convert their integer representations back to strings when printing them. 177 string _startPosStr; 178 string _endPosStr; 179 string _name; 180 string _score; 181 string _strand; 182 strandType _strandVal; 183 bool _zeroLength; 184 bool _isUnmapped; 185 bool _isMateUnmapped; 186 bool _isValidHit; 187 FileRecordMgr *_frm; 188 }; 189 190 class RecordPtrSortAscFunctor { 191 public: operator()192 bool operator()(const Record *rec1, const Record *rec2) const { return *rec1 < *rec2; } 193 }; 194 195 class RecordPtrSortDescFunctor { 196 public: operator()197 bool operator()(const Record *rec1, const Record *rec2) const { return *rec1 > *rec2; } 198 }; 199 #endif /* RECORD_H_ */ 200