1 package prok; 2 3 import java.util.ArrayList; 4 5 import fileIO.ByteFile; 6 import shared.Parse; 7 import shared.Tools; 8 9 public class GeneModelParser { 10 GeneModelParser(String fname_)11 GeneModelParser(String fname_){ 12 fname=fname_; 13 lines=ByteFile.toLines(fname); 14 gm=new GeneModel(false); 15 } 16 hasMore()17 boolean hasMore(){ 18 return pos<lines.size(); 19 } 20 nextLine()21 byte[] nextLine(){ 22 if(pos>=lines.size()){return null;} 23 byte[] line=lines.get(pos); 24 pos++; 25 return line; 26 } 27 28 final String fname; 29 final ArrayList<byte[]> lines; 30 private final GeneModel gm; 31 int pos=0; 32 33 /*--------------------------------------------------------------*/ 34 /*---------------- Parsing ----------------*/ 35 /*--------------------------------------------------------------*/ 36 loadModel(String fname)37 public static GeneModel loadModel(String fname) { 38 GeneModelParser gmp=new GeneModelParser(fname); 39 return gmp.parse(); 40 } 41 parse()42 private GeneModel parse(){ 43 while(hasMore()){ 44 byte[] line=nextLine(); 45 boolean valid=parseHeader(line); 46 if(!valid){ 47 pos--; 48 break; 49 } 50 }//Done parsing headers 51 52 ArrayList<StatsContainer> containers=new ArrayList<StatsContainer>(); 53 while(hasMore()){ 54 StatsContainer sc=parseContainer(); 55 if(sc!=null){ 56 containers.add(sc); 57 }else{ 58 assert(false); 59 } 60 } 61 62 assert(containers.size()==6) : containers.size(); 63 for(StatsContainer sc : containers){ 64 gm.allContainers[sc.type].setFrom(sc); 65 } 66 // gm.statsCDS.setFrom(containers.get(0); 67 // gm.statstRNA=containers.get(1); 68 // gm.stats16S=containers.get(2); 69 // gm.stats23S=containers.get(3); 70 // gm.stats5S=containers.get(4); 71 // gm.stats18S=containers.get(5); 72 73 // gm.stats16S.minIdentity=ProkObject.min16SIdentity; 74 // gm.stats23S.minIdentity=ProkObject.min23SIdentity; 75 // gm.stats18S.minIdentity=ProkObject.min18SIdentity; 76 77 gm.setStatics(); 78 79 return gm; 80 } 81 parseContainer()82 private StatsContainer parseContainer(){ 83 String name=null; 84 int type=-1; 85 long lengthCount=0; 86 long lengthSum=0; 87 for(byte[] line=nextLine(); line!=null; line=nextLine()){ 88 if(line[0]!='#'){ 89 pos--; 90 break; 91 } 92 93 if(Tools.startsWith(line, "##")){ 94 //ignore 95 }else if(Tools.startsWith(line, "#name")){ 96 name=parseString(line); 97 }else if(Tools.startsWith(line, "#type")){ 98 type=parseInt(line); 99 }else if(Tools.startsWith(line, "#count")){ 100 lengthCount=parseLong(line); 101 }else if(Tools.startsWith(line, "#lengthSum")){ 102 lengthSum=parseLong(line); 103 }else if(Tools.startsWith(line, "#contains")){ 104 break; 105 }else{ 106 assert(false) : new String(line); 107 } 108 } 109 110 ArrayList<FrameStats> list=new ArrayList<FrameStats>(3); 111 for(int i=0; i<3; i++){ 112 FrameStats fs=parseStats(); 113 list.add(fs); 114 } 115 116 StatsContainer sc=new StatsContainer(type); 117 assert(sc.name.equals(name)) : name+", "+sc.name+", "+type; 118 sc.lengthCount=lengthCount; 119 sc.lengthSum=lengthSum; 120 121 sc.setInner(list.get(0)); 122 sc.setStart(list.get(1)); 123 sc.setStop(list.get(2)); 124 125 sc.calculate(); 126 assert(sc.inner!=null); 127 return sc; 128 } 129 parseStats()130 private FrameStats parseStats(){ 131 String name=null; 132 int k=-1, frames=-1, offset=-1; 133 // System.err.println("A"); 134 for(byte[] line=nextLine(); line!=null; line=nextLine()){ 135 if(line[0]!='#'){ 136 pos--; 137 // System.err.println("B"); 138 assert(false) : new String(line); 139 break; 140 } 141 142 if(Tools.startsWith(line, "##")){ 143 //ignore 144 }else if(Tools.startsWith(line, "#name")){ 145 name=parseString(line); 146 }else if(Tools.startsWith(line, "#k")){ 147 k=parseInt(line); 148 }else if(Tools.startsWith(line, "#frames")){ 149 frames=parseInt(line); 150 }else if(Tools.startsWith(line, "#offset")){ 151 offset=parseInt(line); 152 }else if(Tools.startsWith(line, "#valid\tframe")){ 153 // assert(false); 154 // System.err.println("C"); 155 break; 156 } 157 // System.err.println("D"); 158 } 159 // assert(false); 160 // System.err.println("E"); 161 162 FrameStats fs=new FrameStats(name, k, frames, offset); 163 164 for(int i=0, max=2*fs.frames; i<max; i++){ 165 byte[] line=nextLine(); 166 fs.parseData(line); 167 } 168 return fs; 169 } 170 parseString(byte[] line)171 private static String parseString(byte[] line){ 172 int idx=Tools.indexOf(line, '\t'); 173 String s=new String(line, idx+1, line.length-idx-1); 174 return s; 175 } parseInt(byte[] line)176 private static int parseInt(byte[] line){ 177 int idx=Tools.indexOf(line, '\t'); 178 return Parse.parseInt(line, idx+1, line.length); 179 } parseLong(byte[] line)180 private static long parseLong(byte[] line){ 181 int idx=Tools.indexOf(line, '\t'); 182 return Parse.parseLong(line, idx+1, line.length); 183 } 184 185 // public static void parseHeaderStatic(byte[] line){ 186 // 187 // assert(line[0]=='#'); 188 // if(Tools.startsWith(line, "#k_inner")){ 189 // int x=(int)parseLong(line); 190 // assert(x==innerKmerLength); 191 // setInnerK(x); 192 // }else if(Tools.startsWith(line, "#k_end")){ 193 // int x=(int)parseLong(line); 194 // assert(x==endKmerLength); 195 // setEndK(x); 196 // }else if(Tools.startsWith(line, "#start_left_offset")){ 197 // int x=(int)parseLong(line); 198 // assert(x==startLeftOffset); 199 // setStartLeftOffset(x); 200 // }else if(Tools.startsWith(line, "#start_right_offset")){ 201 // int x=(int)parseLong(line); 202 // assert(x==startRightOffset); 203 // setStartRightOffset(x); 204 // }else if(Tools.startsWith(line, "#stop_left_offset")){ 205 // int x=(int)parseLong(line); 206 // assert(x==stopLeftOffset); 207 // setStopLeftOffset(x); 208 // }else if(Tools.startsWith(line, "#stop_right_offset")){ 209 // int x=(int)parseLong(line); 210 // assert(x==stopRightOffset); 211 // setStopRightOffset(x); 212 // } 213 // } 214 parseHeader(byte[] line)215 public boolean parseHeader(byte[] line){ 216 if(line[0]!='#'){return false;} 217 218 if(Tools.startsWith(line, "#BBMap")){ 219 //ignore 220 }else if(Tools.startsWith(line, "##")){ 221 //ignore 222 }else if(Tools.startsWith(line, "#files")){//Not necessary 223 String[] split=new String(line).split("\t"); 224 try { 225 gm.numFiles+=Integer.parseInt(split[1]); 226 } catch (NumberFormatException e) { 227 gm.numFiles+=split.length-1;//old style pgm 228 } 229 // for(String s : new String(line).split("\t")){ 230 // if(s.charAt(0)!='#'){ 231 // gm.fnames.add(s); 232 // } 233 // } 234 }else if(Tools.startsWith(line, "#taxIDs")){//Can be made faster 235 for(String s : new String(line).split("\t")){ 236 if(s.charAt(0)!='#'){ 237 gm.taxIds.add(Integer.parseInt(s)); 238 } 239 } 240 }else if(Tools.startsWith(line, "#scaffolds")){ 241 long x=parseLong(line); 242 gm.readsProcessed=x; 243 }else if(Tools.startsWith(line, "#bases")){ 244 long x=parseLong(line); 245 gm.basesProcessed=x; 246 }else if(Tools.startsWith(line, "#genes")){ 247 long x=parseLong(line); 248 gm.genesProcessed=x; 249 }else if(Tools.startsWith(line, "#GC")){ 250 //ignore 251 }else if(Tools.startsWith(line, "#ACGTN")){ 252 String[] split=new String(line).split("\t"); 253 for(int i=0; i<gm.baseCounts.length; i++){ 254 gm.baseCounts[i]=Long.parseLong(split[i+1]); 255 } 256 }else{ 257 return false; 258 } 259 return true; 260 } 261 262 } 263