1 package stream; 2 3 import java.io.IOException; 4 import java.io.OutputStream; 5 import java.io.PrintWriter; 6 import java.util.ArrayList; 7 import java.util.List; 8 9 import dna.Data; 10 import fileIO.TextStreamWriter; 11 import shared.Shared; 12 import shared.Tools; 13 import structures.ByteBuilder; 14 15 /** 16 * @author Brian Bushnell 17 * @date Jul 7, 2014 18 * 19 */ 20 public class SamHeader { 21 header0B(ByteBuilder bb)22 public static ByteBuilder header0B(ByteBuilder bb){ 23 // if(MAKE_TOPHAT_TAGS){ 24 // return new ByteBuilder("@HD\tVN:"+(VERSION<1.4f ? "1.0" : "1.4")+"\tSO:unsorted"); 25 // } 26 bb.append("@HD\tVN:"); 27 bb.append((SamLine.VERSION<1.4f ? "1.3" : "1.4")); 28 bb.append("\tSO:unsorted"); 29 return bb; 30 } 31 header0()32 public static StringBuilder header0(){ 33 // if(MAKE_TOPHAT_TAGS){ 34 // return new StringBuilder("@HD\tVN:"+(SamLine.VERSION<1.4f ? "1.0" : "1.4")+"\tSO:unsorted"); 35 // } 36 StringBuilder sb=new StringBuilder("@HD\tVN:"+(SamLine.VERSION<1.4f ? "1.3" : "1.4")+"\tSO:unsorted"); 37 return sb; 38 } 39 scaffolds(int minChrom, int maxChrom, boolean sort)40 static ArrayList<String> scaffolds(int minChrom, int maxChrom, boolean sort){ 41 final ArrayList<String> list=new ArrayList<String>(4000); 42 final StringBuilder sb=new StringBuilder(1000); 43 for(int i=minChrom; i<=maxChrom && i<=Data.numChroms; i++){ 44 final byte[][] inames=Data.scaffoldNames[i]; 45 for(int j=0; j<Data.chromScaffolds[i]; j++){ 46 final byte[] scn=inames[j]; 47 sb.append("@SQ\tSN:");//+Data.scaffoldNames[i][j]); 48 if(scn==null){ 49 assert(false) : "scaffoldName["+i+"]["+j+"] = null"; 50 sb.append("null"); 51 }else{ 52 appendScafName(sb, scn); 53 } 54 sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]))); 55 // sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]+1000L))); 56 // sb.append("\tAS:"+((Data.name==null ? "" : Data.name+" ")+"b"+Data.GENOME_BUILD).replace('\t', ' ')); 57 58 sb.append('\n'); 59 list.add(sb.toString()); 60 sb.setLength(0); 61 } 62 } 63 if(sort){Shared.sort(list);} 64 return list; 65 } 66 header1(int minChrom, int maxChrom)67 public static StringBuilder header1(int minChrom, int maxChrom){ 68 StringBuilder sb=new StringBuilder(20000); 69 if(SamLine.SORT_SCAFFOLDS){ 70 ArrayList<String> scaffolds=scaffolds(minChrom, maxChrom, true); 71 for(int i=0; i<scaffolds.size(); i++){ 72 sb.append(scaffolds.get(i)); 73 scaffolds.set(i, null); 74 } 75 return sb; 76 } 77 78 for(int i=minChrom; i<=maxChrom && i<=Data.numChroms; i++){ 79 final byte[][] inames=Data.scaffoldNames[i]; 80 for(int j=0; j<Data.chromScaffolds[i]; j++){ 81 byte[] scn=inames[j]; 82 sb.append("@SQ\tSN:");//+Data.scaffoldNames[i][j]); 83 if(scn==null){ 84 assert(false) : "scaffoldName["+i+"]["+j+"] = null"; 85 sb.append("null"); 86 }else{ 87 appendScafName(sb, scn); 88 } 89 90 sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]))); 91 // sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]+1000L))); 92 // sb.append("\tAS:"+((Data.name==null ? "" : Data.name+" ")+"build "+Data.GENOME_BUILD).replace('\t', ' ')); 93 94 sb.append('\n'); 95 } 96 } 97 98 return sb; 99 } 100 printHeader1(int minChrom, int maxChrom, PrintWriter pw)101 public static void printHeader1(int minChrom, int maxChrom, PrintWriter pw){ 102 if(SamLine.SORT_SCAFFOLDS){ 103 ArrayList<String> scaffolds=scaffolds(minChrom, maxChrom, true); 104 for(int i=0; i<scaffolds.size(); i++){ 105 pw.print(scaffolds.set(i, null)); 106 } 107 return; 108 } 109 110 for(int i=minChrom; i<=maxChrom && i<=Data.numChroms; i++){ 111 final byte[][] inames=Data.scaffoldNames[i]; 112 StringBuilder sb=new StringBuilder(256); 113 for(int j=0; j<Data.chromScaffolds[i]; j++){ 114 final byte[] scn=inames[j]; 115 // StringBuilder sb=new StringBuilder(7+(scn==null ? 4 : scn.length)+4+10+4+/*(Data.name==null ? 0 : Data.name.length()+1)+11*/+4);//last one could be 1 116 sb.append("@SQ\tSN:");//+Data.scaffoldNames[i][j]); 117 if(scn==null){ 118 assert(false) : "scaffoldName["+i+"]["+j+"] = null"; 119 sb.append("null"); 120 }else{ 121 appendScafName(sb, scn); 122 } 123 sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]))); 124 // sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]+1000L))); 125 // sb.append("\tAS:"+((Data.name==null ? "" : Data.name+" ")+"b"+Data.GENOME_BUILD).replace('\t', ' ')); 126 127 sb.append('\n'); 128 129 pw.print(sb); 130 sb.setLength(0); 131 } 132 } 133 } 134 printHeader1B(int minChrom, int maxChrom, ByteBuilder bb, OutputStream os)135 public static void printHeader1B(int minChrom, int maxChrom, ByteBuilder bb, OutputStream os){ 136 if(verbose){System.err.println("printHeader1B("+minChrom+", "+maxChrom+")");} 137 138 if(SamLine.SORT_SCAFFOLDS){ 139 if(verbose){System.err.println("Sorting scaffolds");} 140 ArrayList<String> scaffolds=scaffolds(minChrom, maxChrom, true); 141 for(int i=0; i<scaffolds.size(); i++){ 142 String s=scaffolds.set(i, null); 143 bb.append(s); 144 if(bb.length>=32768){ 145 try { 146 os.write(bb.array, 0, bb.length); 147 } catch (IOException e) { 148 throw new RuntimeException(e); 149 } 150 bb.setLength(0); 151 } 152 } 153 return; 154 } 155 156 if(verbose){System.err.println("Iterating over chroms");} 157 for(int chrom=minChrom; chrom<=maxChrom && chrom<=Data.numChroms; chrom++){ 158 // if(verbose){System.err.println("chrom "+chrom);} 159 final byte[][] inames=Data.scaffoldNames[chrom]; 160 // if(verbose){System.err.println("inames"+(inames==null ? " = null" : ".length = "+inames.length));} 161 final int numScafs=Data.chromScaffolds[chrom]; 162 // if(verbose){System.err.println("scaffolds: "+numScafs);} 163 assert(inames.length==numScafs) : "Mismatch between number of scaffolds and names for chrom "+chrom+": "+inames.length+" != "+numScafs; 164 for(int scaf=0; scaf<numScafs; scaf++){ 165 // if(verbose){System.err.println("chromScaffolds["+scaf+"] = "+(inames==null ? "=null" : ".length="+inames.length));} 166 final byte[] scafName=inames[scaf]; 167 // if(verbose){System.err.println("scafName = "+(scafName==null ? "null" : new String(scafName)));} 168 bb.append("@SQ\tSN:");//+Data.scaffoldNames[i][j]); 169 if(scafName==null){ 170 assert(false) : "scaffoldName["+chrom+"]["+scaf+"] = null"; 171 bb.append(scafName); 172 }else{ 173 appendScafName(bb, scafName); 174 } 175 bb.append("\tLN:"); 176 bb.append(Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[chrom][scaf]))); 177 // sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]+1000L))); 178 // sb.append("\tAS:"+((Data.name==null ? "" : Data.name+" ")+"b"+Data.GENOME_BUILD).replace('\t', ' ')); 179 180 bb.nl(); 181 182 if(bb.length>=32768){ 183 try { 184 os.write(bb.array, 0, bb.length); 185 } catch (IOException e) { 186 throw new RuntimeException(e); 187 } 188 bb.setLength(0); 189 } 190 } 191 } 192 } 193 printHeader1(int minChrom, int maxChrom, TextStreamWriter tsw)194 public static void printHeader1(int minChrom, int maxChrom, TextStreamWriter tsw){ 195 if(SamLine.SORT_SCAFFOLDS){ 196 ArrayList<String> scaffolds=scaffolds(minChrom, maxChrom, true); 197 for(int i=0; i<scaffolds.size(); i++){ 198 tsw.print(scaffolds.set(i, null)); 199 } 200 return; 201 } 202 203 for(int i=minChrom; i<=maxChrom && i<=Data.numChroms; i++){ 204 final byte[][] inames=Data.scaffoldNames[i]; 205 final StringBuilder sb=new StringBuilder(256); 206 for(int j=0; j<Data.chromScaffolds[i]; j++){ 207 final byte[] scn=inames[j]; 208 // StringBuilder sb=new StringBuilder(7+(scn==null ? 4 : scn.length)+4+10+4+/*(Data.name==null ? 0 : Data.name.length()+1)+11*/+4);//last one could be 1 209 sb.append("@SQ\tSN:");//+Data.scaffoldNames[i][j]); 210 if(scn==null){ 211 assert(false) : "scaffoldName["+i+"]["+j+"] = null"; 212 sb.append("null"); 213 }else{ 214 appendScafName(sb, scn); 215 } 216 sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]))); 217 // sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]+1000L))); 218 // sb.append("\tAS:"+((Data.name==null ? "" : Data.name+" ")+"b"+Data.GENOME_BUILD).replace('\t', ' ')); 219 220 sb.append('\n'); 221 222 tsw.print(sb); 223 sb.setLength(0); 224 } 225 } 226 } 227 appendScafName(StringBuilder sb, byte[] scn)228 static void appendScafName(StringBuilder sb, byte[] scn){ 229 if(Data.scaffoldPrefixes){ 230 int k=0; 231 while(k<scn.length && scn[k]!='$'){k++;} 232 k++; 233 while(k<scn.length){ 234 sb.append((char)scn[k]); 235 k++; 236 } 237 }else{ 238 final char[] buffer=Shared.getTLCB(scn.length); 239 for(int i=0; i<scn.length; i++){buffer[i]=(char)scn[i];} 240 sb.append(buffer, 0, scn.length); 241 } 242 } 243 appendScafName(ByteBuilder sb, byte[] scn)244 static void appendScafName(ByteBuilder sb, byte[] scn){ 245 if(Data.scaffoldPrefixes){ 246 int k=0; 247 while(k<scn.length && scn[k]!='$'){k++;} 248 k++; 249 while(k<scn.length){ 250 sb.append(scn[k]); 251 k++; 252 } 253 }else{ 254 sb.append(scn); 255 } 256 } 257 header2()258 public static StringBuilder header2(){ 259 StringBuilder sb=new StringBuilder(1000); 260 // sb.append("@RG\tID:unknownRG\tSM:unknownSM\tPL:ILLUMINA\n"); //Can cause problems. If RG is in the header, reads may need extra fields. 261 262 // if(MAKE_TOPHAT_TAGS){ 263 //// sb.append("@PG\tID:TopHat\tVN:2.0.6\tCL:/usr/common/jgi/aligners/tophat/2.0.6/bin/tophat -p 16 -r 0 --max-multihits 1 Creinhardtii_236 reads_1.fa reads_2.fa"); 264 // sb.append("@PG\tID:TopHat\tVN:2.0.6"); 265 // }else{ 266 // sb.append("@PG\tID:BBMap\tPN:BBMap\tVN:"+Shared.BBMAP_VERSION_STRING); 267 // } 268 269 if(SamLine.READGROUP_ID!=null){ 270 sb.append("@RG\tID:").append(SamLine.READGROUP_ID); 271 if(SamLine.READGROUP_CN!=null){sb.append("\tCN:").append(SamLine.READGROUP_CN);} 272 if(SamLine.READGROUP_DS!=null){sb.append("\tDS:").append(SamLine.READGROUP_DS);} 273 if(SamLine.READGROUP_DT!=null){sb.append("\tDT:").append(SamLine.READGROUP_DT);} 274 if(SamLine.READGROUP_FO!=null){sb.append("\tFO:").append(SamLine.READGROUP_FO);} 275 if(SamLine.READGROUP_KS!=null){sb.append("\tKS:").append(SamLine.READGROUP_KS);} 276 if(SamLine.READGROUP_LB!=null){sb.append("\tLB:").append(SamLine.READGROUP_LB);} 277 if(SamLine.READGROUP_PG!=null){sb.append("\tPG:").append(SamLine.READGROUP_PG);} 278 if(SamLine.READGROUP_PI!=null){sb.append("\tPI:").append(SamLine.READGROUP_PI);} 279 if(SamLine.READGROUP_PL!=null){sb.append("\tPL:").append(SamLine.READGROUP_PL);} 280 if(SamLine.READGROUP_PU!=null){sb.append("\tPU:").append(SamLine.READGROUP_PU);} 281 if(SamLine.READGROUP_SM!=null){sb.append("\tSM:").append(SamLine.READGROUP_SM);} 282 sb.append('\n'); 283 } 284 285 sb.append("@PG\tID:BBMap\tPN:BBMap\tVN:"); 286 sb.append(Shared.BBMAP_VERSION_STRING); 287 288 if(Shared.BBMAP_CLASS!=null){ 289 sb.append("\tCL:java"); 290 { 291 List<String> list=null; 292 list=Shared.JVM_ARGS(); 293 if(list!=null){ 294 for(String s : list){ 295 sb.append(' '); 296 sb.append(s); 297 } 298 } 299 } 300 sb.append(" align2."+Shared.BBMAP_CLASS); 301 if(Shared.COMMAND_LINE!=null){ 302 for(String s : Shared.COMMAND_LINE){ 303 sb.append(' '); 304 sb.append(s); 305 } 306 } 307 } 308 309 return sb; 310 } 311 header2B(ByteBuilder sb)312 public static ByteBuilder header2B(ByteBuilder sb){ 313 314 if(SamLine.READGROUP_ID!=null){ 315 sb.append("@RG\tID:").append(SamLine.READGROUP_ID); 316 if(SamLine.READGROUP_CN!=null){sb.append("\tCN:").append(SamLine.READGROUP_CN);} 317 if(SamLine.READGROUP_DS!=null){sb.append("\tDS:").append(SamLine.READGROUP_DS);} 318 if(SamLine.READGROUP_DT!=null){sb.append("\tDT:").append(SamLine.READGROUP_DT);} 319 if(SamLine.READGROUP_FO!=null){sb.append("\tFO:").append(SamLine.READGROUP_FO);} 320 if(SamLine.READGROUP_KS!=null){sb.append("\tKS:").append(SamLine.READGROUP_KS);} 321 if(SamLine.READGROUP_LB!=null){sb.append("\tLB:").append(SamLine.READGROUP_LB);} 322 if(SamLine.READGROUP_PG!=null){sb.append("\tPG:").append(SamLine.READGROUP_PG);} 323 if(SamLine.READGROUP_PI!=null){sb.append("\tPI:").append(SamLine.READGROUP_PI);} 324 if(SamLine.READGROUP_PL!=null){sb.append("\tPL:").append(SamLine.READGROUP_PL);} 325 if(SamLine.READGROUP_PU!=null){sb.append("\tPU:").append(SamLine.READGROUP_PU);} 326 if(SamLine.READGROUP_SM!=null){sb.append("\tSM:").append(SamLine.READGROUP_SM);} 327 sb.append('\n'); 328 } 329 330 sb.append("@PG\tID:BBMap\tPN:BBMap\tVN:"); 331 sb.append(Shared.BBMAP_VERSION_STRING); 332 333 if(Shared.BBMAP_CLASS!=null){ 334 sb.append("\tCL:java"); 335 { 336 List<String> list=null; 337 list=Shared.JVM_ARGS(); 338 if(list!=null){ 339 for(String s : list){ 340 sb.append(' '); 341 sb.append(s); 342 } 343 } 344 } 345 sb.append(" align2."+Shared.BBMAP_CLASS); 346 if(Shared.COMMAND_LINE!=null){ 347 for(String s : Shared.COMMAND_LINE){ 348 sb.append(' '); 349 sb.append(s); 350 } 351 } 352 } 353 354 return sb; 355 } 356 357 private static final boolean verbose=false; 358 359 } 360