1 #ifndef H_Genome
2 #define H_Genome
3 
4 #include "IncludeDefine.h"
5 #include "Parameters.h"
6 #include "PackedArray.h"
7 #include "SharedMemory.h"
8 #include "Variation.h"
9 #include "SuperTranscriptome.h"
10 
11 class GTF;
12 
13 class Genome {
14 private:
15     key_t shmKey;
16     char *shmStart;
17     uint OpenStream(string name, ifstream & stream, uint size);
18     void HandleSharedMemoryException(const SharedMemoryException & exc, uint64 shmSize);
19 public:
20     Parameters &P;
21     ParametersGenome &pGe;
22     SharedMemory *sharedMemory;
23 
24     enum {exT,exS,exE,exG,exL}; //indexes in the exonLoci array from GTF
25 
26     char *G, *G1;
27     uint64 nGenome, nG1alloc;
28     PackedArray SA,SAinsert,SApass1,SApass2;
29     PackedArray SAi;
30     Variation *Var;
31 
32     uint nGenomeInsert, nGenomePass1, nGenomePass2, nSAinsert, nSApass1, nSApass2;
33 
34 
35     //chr parameters
36     vector <uint64> chrStart, chrLength, chrLengthAll;
37     uint genomeChrBinNbases, chrBinN, *chrBin;
38     vector <string> chrName, chrNameAll;
39     map <string,uint64> chrNameIndex;
40 
41     uint *genomeSAindexStart;//starts of the L-mer indices in the SAindex, 1<=L<=pGe.gSAindexNbases
42 
43     uint nSA, nSAbyte, nChrReal;//genome length, SA length, # of chromosomes, vector of chromosome start loci
44     uint nGenome2, nSA2, nSAbyte2, nChrReal2; //same for the 2nd pass
45     uint nSAi; //size of the SAindex
46     unsigned char GstrandBit, SAiMarkNbit, SAiMarkAbsentBit; //SA index bit for strand information
47     uint GstrandMask, SAiMarkAbsentMask, SAiMarkAbsentMaskC, SAiMarkNmask, SAiMarkNmaskC;//maske to remove strand bit from SA index, to remove mark from SAi index
48 
49     //SJ database parameters
50     uint sjdbOverhang, sjdbLength; //length of the donor/acceptor, length of the sj "chromosome" =2*pGe.sjdbOverhang+1 including spacer
51     uint sjChrStart,sjdbN; //first sj-db chr
52     uint sjGstart; //start of the sj-db genome sequence
53     uint *sjDstart,*sjAstart,*sjStr, *sjdbStart, *sjdbEnd; //sjdb loci
54     uint8 *sjdbMotif; //motifs of annotated junctions
55     uint8 *sjdbShiftLeft, *sjdbShiftRight; //shifts of junctions
56     uint8 *sjdbStrand; //junctions strand, not used yet
57 
58    //sequence insert parameters
59     uint genomeInsertL; //total length of the sequence to be inserted on the fly
60     uint genomeInsertChrIndFirst; //index of the first inserted chromosome
61 
62     //SuperTranscriptome genome
63     SuperTranscriptome *superTr;
64 
65     Genome (Parameters &P, ParametersGenome &pGe);
66     //~Genome();
67 
68     void freeMemory();
69     void genomeLoad();
70     void genomeOutLoad();
71     void chrBinFill();
72     void chrInfoLoad();
73     void genomeSequenceAllocate(uint64 nGenomeIn, uint64 &nG1allocOut, char*& Gout, char*& G1out);
74     void loadSJDB(string &genDir);
75 
76     void insertSequences();
77 
78     //void consensusSequence(); DEPRECATED
79 
80     void genomeGenerate();
81     void writeChrInfo(const string dirOut);
82     void concatenateChromosomes(const vector<vector<uint8>> &vecSeq, const vector<string> &vecName, const uint64 padBin);
83     void writeGenomeSequence(const string dirOut);
84 
85     //transform genome coordinates
86     struct {
87         bool convYes;
88         bool gapsAreJunctions;
89         Genome *g;
90         string convFile;
91         vector<array<uint64,3>> convBlocks;
92         uint64 nMinusStrandOffset;//offset for the (-) strand, typically=nGenomeReal
93     } genomeOut;
94 
95     typedef struct {
96         uint64 pos;
97         int32 len;//0: SNV, <0: deletion; >0: insertion
98         array<string,2> seq;//sequence for SNV and insertions, empty for deletions
99     } VariantInfo;
100 
101     void transformGenome(GTF *gtf) ;
102     void transformChrLenStart(map<string,vector<VariantInfo>> &vcfVariants, vector<uint64> &chrStart1, vector<uint64> &chrLength1);
103     void transformGandBlocks(map<string,vector<VariantInfo>> &vcfVariants, vector<uint64> &chrStart1, vector<uint64> &chrLength1, vector<array<uint64,3>> &transformBlocks, char *Gnew);
104     void transformBlocksWrite(vector<array<uint64,3>> &transformBlocks);
105     void transformExonLoci(vector<array<uint64,exL>> &exonLoci, vector<array<uint64,3>> &transformBlocks);
106 };
107 #endif
108