1 #include "Chain.h"
2 #include "streamFuns.h"
3 #include "serviceFuns.cpp"
4 
Chain(Parameters & Pin,string chainFileNameIn)5 Chain::Chain(Parameters &Pin, string chainFileNameIn) : P(Pin), chainFileName(chainFileNameIn)
6 {
7     chainLoad();
8 };
9 
chainLoad()10 void Chain::chainLoad()
11 {
12     ifstream &streamIn = ifstrOpen(chainFileName, ERROR_OUT, "SOLUTION: check path and permission for the chain file" + chainFileName, P);
13 
14     string chr1;//current chromsome 1 (old)
15 
16     while (streamIn.good())
17     {
18         string line1;
19         getline(streamIn,line1);
20         istringstream line1str(line1);
21 
22         vector <string>  fields(13);
23 
24         for (int ii=0;ii<4;ii++)
25             line1str >> fields[ii];
26         if (fields[0]=="")
27         {//empty line, continue
28         } else if (fields[1]=="")
29         {//end of chain
30             chrChains[chr1].bLen.push_back(std::stoi(fields[0]));//read the last block length
31             chrChains[chr1].bN=chrChains[chr1].bLen.size();
32         } else if (fields[3]=="")
33         {//normal chain block
34             chrChains[chr1].bLen.push_back(std::stoi(fields[0]));
35 
36             uint s=chrChains[chr1].bStart1.back() + chrChains[chr1].bLen.back() + std::stoi(fields[1]);//prev start + length + shift
37             chrChains[chr1].bStart1.push_back(s);
38 
39             s=chrChains[chr1].bStart2.back() + chrChains[chr1].bLen.back() + std::stoi(fields[2]);//prev start + length + shift
40             chrChains[chr1].bStart2.push_back(s);
41         } else
42         {//chain header
43             //chain score tName tSize tStrand tStart tEnd qName qSize qStrand qStart qEnd id
44             //    0     1     2     3     4       5      6    7     8     9       10   11 12
45 
46             for (int ii=4;ii<13;ii++)
47                 line1str >> fields[ii]; //read all the fields
48 
49             chr1=fields[2];
50             chrChains[chr1].chr1=chr1;
51             chrChains[chr1].chr2=fields[7];//NOTE: the whole procedure (for now) only works for single chain per chromosome
52             chrChains[chr1].bStart1.push_back(std::stoi(fields[5]));
53             chrChains[chr1].bStart2.push_back(std::stoi(fields[10]));
54         };
55     };
56 };
57 
liftOverGTF(string gtfFileName,string outFileName)58 void Chain::liftOverGTF(string gtfFileName, string outFileName)
59 {//simple arithmetic lift-over of the GTF file
60     ifstream &streamIn = ifstrOpen(gtfFileName, ERROR_OUT, "SOLUTION: check path and permission for the GTF file" + gtfFileName, P);
61     ofstream &streamOut = ofstrOpen(outFileName, ERROR_OUT, P);
62     ofstream &streamOutUnlifted = ofstrOpen(outFileName+".unlifted", ERROR_OUT, P);
63 
64     while (streamIn.good())
65     {
66         string line1;
67         getline(streamIn,line1);
68         istringstream line1str(line1);
69 
70         string chr1;
71         line1str >> chr1;
72 
73         if (chr1=="" || chr1.substr(0,1)=="#")
74             continue;//empty or comment line
75 
76         if (chrChains.count(chr1)==0)
77             exitWithError("GTF contains chromosome " + chr1 + " not present in the chain file " + chainFileName,std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
78 
79         OneChain *ch1 = & chrChains[chr1];//the chain for the chr1
80 
81         string str1,str2;
82         line1str >> str1 >> str2;//fields 2,3
83 
84         uint c1, c2[2]; //coordinates: 1/2 (old/new)
85 
86         for (int ii=0;ii<2;ii++)
87         {//read and transform the two coordinates
88             line1str >> c1;
89             int32 i1 = binarySearch1a <uint> (c1, ch1->bStart1.data(), ch1->bN);
90 
91             c2[ii]=-1;//-1 means impossible to lift this end
92 
93             if (i1>=0 && c1 < ch1->bStart1[i1]+ch1->bLen[i1])
94             {//c1 is inside the block, simple transformation
95                 c2[ii]=ch1->bStart2[i1] + c1 - ch1->bStart1[i1];
96             } else
97             {//c1 is outside of the block
98                 if (ii==0 && i1 < (int32) ch1->bN-1)
99                 {//left end => c2 will be at the start of the next block
100                         c2[ii]=ch1->bStart2[i1+1]; //if i1=-1, it will work = start of the 0-tn blocl
101                 } else if (ii==1 && i1 >= 0)
102                 {
103                     c2[ii]=ch1->bStart2[i1]+ch1->bLen[i1]-1;
104                 };
105             };
106         };
107         if (c2[0]!=-1llu && c2[1]!=-1llu && c2[1]>=c2[0])
108         {//good conversion
109             streamOut << ch1->chr2 <<"\t"<< str1 <<"\t"<< str2 <<"\t"<<c2[0] <<"\t"<< c2[1] << line1str.rdbuf() << "\n";
110         } else {
111             streamOutUnlifted << line1 <<"\n";
112         };
113     };
114     streamOutUnlifted.close();
115     streamOut.close();
116 };
117 
118 //             if (mapGen.chrNameIndex.count(oldname))
119 //             {
120 //                 ostringstream errOut;
121 //                 errOut << "EXITING because of fatal INPUT file error: chain file contains chromosome (scaffold) not present in the genome " << oldname <<"\n";
122 //                 errOut << ERROR_OUT << "\n";
123 //                 exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
124 //             };
125 //             uint ichr=mapGen.chrNameIndex[oldname];//chr index in the genome list
126 //             bStart1[bN] += mapGen.chrLength[ichr];//whole genome chain - shift by chr start
127