1 #ifndef H_Genome 2 #define H_Genome 3 4 #include "IncludeDefine.h" 5 #include "Parameters.h" 6 #include "PackedArray.h" 7 #include "SharedMemory.h" 8 #include "Variation.h" 9 #include "SuperTranscriptome.h" 10 11 class GTF; 12 13 class Genome { 14 private: 15 key_t shmKey; 16 char *shmStart; 17 uint OpenStream(string name, ifstream & stream, uint size); 18 void HandleSharedMemoryException(const SharedMemoryException & exc, uint64 shmSize); 19 public: 20 Parameters &P; 21 ParametersGenome &pGe; 22 SharedMemory *sharedMemory; 23 24 enum {exT,exS,exE,exG,exL}; //indexes in the exonLoci array from GTF 25 26 char *G, *G1; 27 uint64 nGenome, nG1alloc; 28 PackedArray SA,SAinsert,SApass1,SApass2; 29 PackedArray SAi; 30 Variation *Var; 31 32 uint nGenomeInsert, nGenomePass1, nGenomePass2, nSAinsert, nSApass1, nSApass2; 33 34 35 //chr parameters 36 vector <uint64> chrStart, chrLength, chrLengthAll; 37 uint genomeChrBinNbases, chrBinN, *chrBin; 38 vector <string> chrName, chrNameAll; 39 map <string,uint64> chrNameIndex; 40 41 uint *genomeSAindexStart;//starts of the L-mer indices in the SAindex, 1<=L<=pGe.gSAindexNbases 42 43 uint nSA, nSAbyte, nChrReal;//genome length, SA length, # of chromosomes, vector of chromosome start loci 44 uint nGenome2, nSA2, nSAbyte2, nChrReal2; //same for the 2nd pass 45 uint nSAi; //size of the SAindex 46 unsigned char GstrandBit, SAiMarkNbit, SAiMarkAbsentBit; //SA index bit for strand information 47 uint GstrandMask, SAiMarkAbsentMask, SAiMarkAbsentMaskC, SAiMarkNmask, SAiMarkNmaskC;//maske to remove strand bit from SA index, to remove mark from SAi index 48 49 //SJ database parameters 50 uint sjdbOverhang, sjdbLength; //length of the donor/acceptor, length of the sj "chromosome" =2*pGe.sjdbOverhang+1 including spacer 51 uint sjChrStart,sjdbN; //first sj-db chr 52 uint sjGstart; //start of the sj-db genome sequence 53 uint *sjDstart,*sjAstart,*sjStr, *sjdbStart, *sjdbEnd; //sjdb loci 54 uint8 *sjdbMotif; //motifs of annotated junctions 55 uint8 *sjdbShiftLeft, *sjdbShiftRight; //shifts of junctions 56 uint8 *sjdbStrand; //junctions strand, not used yet 57 58 //sequence insert parameters 59 uint genomeInsertL; //total length of the sequence to be inserted on the fly 60 uint genomeInsertChrIndFirst; //index of the first inserted chromosome 61 62 //SuperTranscriptome genome 63 SuperTranscriptome *superTr; 64 65 Genome (Parameters &P, ParametersGenome &pGe); 66 //~Genome(); 67 68 void freeMemory(); 69 void genomeLoad(); 70 void genomeOutLoad(); 71 void chrBinFill(); 72 void chrInfoLoad(); 73 void genomeSequenceAllocate(uint64 nGenomeIn, uint64 &nG1allocOut, char*& Gout, char*& G1out); 74 void loadSJDB(string &genDir); 75 76 void insertSequences(); 77 78 //void consensusSequence(); DEPRECATED 79 80 void genomeGenerate(); 81 void writeChrInfo(const string dirOut); 82 void concatenateChromosomes(const vector<vector<uint8>> &vecSeq, const vector<string> &vecName, const uint64 padBin); 83 void writeGenomeSequence(const string dirOut); 84 85 //transform genome coordinates 86 struct { 87 bool convYes; 88 bool gapsAreJunctions; 89 Genome *g; 90 string convFile; 91 vector<array<uint64,3>> convBlocks; 92 uint64 nMinusStrandOffset;//offset for the (-) strand, typically=nGenomeReal 93 } genomeOut; 94 95 typedef struct { 96 uint64 pos; 97 int32 len;//0: SNV, <0: deletion; >0: insertion 98 array<string,2> seq;//sequence for SNV and insertions, empty for deletions 99 } VariantInfo; 100 101 void transformGenome(GTF *gtf) ; 102 void transformChrLenStart(map<string,vector<VariantInfo>> &vcfVariants, vector<uint64> &chrStart1, vector<uint64> &chrLength1); 103 void transformGandBlocks(map<string,vector<VariantInfo>> &vcfVariants, vector<uint64> &chrStart1, vector<uint64> &chrLength1, vector<array<uint64,3>> &transformBlocks, char *Gnew); 104 void transformBlocksWrite(vector<array<uint64,3>> &transformBlocks); 105 void transformExonLoci(vector<array<uint64,exL>> &exonLoci, vector<array<uint64,3>> &transformBlocks); 106 }; 107 #endif 108