1 #include "Chain.h"
2 #include "streamFuns.h"
3 #include "serviceFuns.cpp"
4
Chain(Parameters & Pin,string chainFileNameIn)5 Chain::Chain(Parameters &Pin, string chainFileNameIn) : P(Pin), chainFileName(chainFileNameIn)
6 {
7 chainLoad();
8 };
9
chainLoad()10 void Chain::chainLoad()
11 {
12 ifstream &streamIn = ifstrOpen(chainFileName, ERROR_OUT, "SOLUTION: check path and permission for the chain file" + chainFileName, P);
13
14 string chr1;//current chromsome 1 (old)
15
16 while (streamIn.good())
17 {
18 string line1;
19 getline(streamIn,line1);
20 istringstream line1str(line1);
21
22 vector <string> fields(13);
23
24 for (int ii=0;ii<4;ii++)
25 line1str >> fields[ii];
26 if (fields[0]=="")
27 {//empty line, continue
28 } else if (fields[1]=="")
29 {//end of chain
30 chrChains[chr1].bLen.push_back(std::stoi(fields[0]));//read the last block length
31 chrChains[chr1].bN=chrChains[chr1].bLen.size();
32 } else if (fields[3]=="")
33 {//normal chain block
34 chrChains[chr1].bLen.push_back(std::stoi(fields[0]));
35
36 uint s=chrChains[chr1].bStart1.back() + chrChains[chr1].bLen.back() + std::stoi(fields[1]);//prev start + length + shift
37 chrChains[chr1].bStart1.push_back(s);
38
39 s=chrChains[chr1].bStart2.back() + chrChains[chr1].bLen.back() + std::stoi(fields[2]);//prev start + length + shift
40 chrChains[chr1].bStart2.push_back(s);
41 } else
42 {//chain header
43 //chain score tName tSize tStrand tStart tEnd qName qSize qStrand qStart qEnd id
44 // 0 1 2 3 4 5 6 7 8 9 10 11 12
45
46 for (int ii=4;ii<13;ii++)
47 line1str >> fields[ii]; //read all the fields
48
49 chr1=fields[2];
50 chrChains[chr1].chr1=chr1;
51 chrChains[chr1].chr2=fields[7];//NOTE: the whole procedure (for now) only works for single chain per chromosome
52 chrChains[chr1].bStart1.push_back(std::stoi(fields[5]));
53 chrChains[chr1].bStart2.push_back(std::stoi(fields[10]));
54 };
55 };
56 };
57
liftOverGTF(string gtfFileName,string outFileName)58 void Chain::liftOverGTF(string gtfFileName, string outFileName)
59 {//simple arithmetic lift-over of the GTF file
60 ifstream &streamIn = ifstrOpen(gtfFileName, ERROR_OUT, "SOLUTION: check path and permission for the GTF file" + gtfFileName, P);
61 ofstream &streamOut = ofstrOpen(outFileName, ERROR_OUT, P);
62 ofstream &streamOutUnlifted = ofstrOpen(outFileName+".unlifted", ERROR_OUT, P);
63
64 while (streamIn.good())
65 {
66 string line1;
67 getline(streamIn,line1);
68 istringstream line1str(line1);
69
70 string chr1;
71 line1str >> chr1;
72
73 if (chr1=="" || chr1.substr(0,1)=="#")
74 continue;//empty or comment line
75
76 if (chrChains.count(chr1)==0)
77 exitWithError("GTF contains chromosome " + chr1 + " not present in the chain file " + chainFileName,std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
78
79 OneChain *ch1 = & chrChains[chr1];//the chain for the chr1
80
81 string str1,str2;
82 line1str >> str1 >> str2;//fields 2,3
83
84 uint c1, c2[2]; //coordinates: 1/2 (old/new)
85
86 for (int ii=0;ii<2;ii++)
87 {//read and transform the two coordinates
88 line1str >> c1;
89 int32 i1 = binarySearch1a <uint> (c1, ch1->bStart1.data(), ch1->bN);
90
91 c2[ii]=-1;//-1 means impossible to lift this end
92
93 if (i1>=0 && c1 < ch1->bStart1[i1]+ch1->bLen[i1])
94 {//c1 is inside the block, simple transformation
95 c2[ii]=ch1->bStart2[i1] + c1 - ch1->bStart1[i1];
96 } else
97 {//c1 is outside of the block
98 if (ii==0 && i1 < (int32) ch1->bN-1)
99 {//left end => c2 will be at the start of the next block
100 c2[ii]=ch1->bStart2[i1+1]; //if i1=-1, it will work = start of the 0-tn blocl
101 } else if (ii==1 && i1 >= 0)
102 {
103 c2[ii]=ch1->bStart2[i1]+ch1->bLen[i1]-1;
104 };
105 };
106 };
107 if (c2[0]!=-1llu && c2[1]!=-1llu && c2[1]>=c2[0])
108 {//good conversion
109 streamOut << ch1->chr2 <<"\t"<< str1 <<"\t"<< str2 <<"\t"<<c2[0] <<"\t"<< c2[1] << line1str.rdbuf() << "\n";
110 } else {
111 streamOutUnlifted << line1 <<"\n";
112 };
113 };
114 streamOutUnlifted.close();
115 streamOut.close();
116 };
117
118 // if (mapGen.chrNameIndex.count(oldname))
119 // {
120 // ostringstream errOut;
121 // errOut << "EXITING because of fatal INPUT file error: chain file contains chromosome (scaffold) not present in the genome " << oldname <<"\n";
122 // errOut << ERROR_OUT << "\n";
123 // exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
124 // };
125 // uint ichr=mapGen.chrNameIndex[oldname];//chr index in the genome list
126 // bStart1[bN] += mapGen.chrLength[ichr];//whole genome chain - shift by chr start
127