1 package stream;
2 
3 import java.io.IOException;
4 import java.io.OutputStream;
5 import java.io.PrintWriter;
6 import java.util.ArrayList;
7 import java.util.List;
8 
9 import dna.Data;
10 import fileIO.TextStreamWriter;
11 import shared.Shared;
12 import shared.Tools;
13 import structures.ByteBuilder;
14 
15 /**
16  * @author Brian Bushnell
17  * @date Jul 7, 2014
18  *
19  */
20 public class SamHeader {
21 
header0B(ByteBuilder bb)22 	public static ByteBuilder header0B(ByteBuilder bb){
23 		//		if(MAKE_TOPHAT_TAGS){
24 		//			return new ByteBuilder("@HD\tVN:"+(VERSION<1.4f ? "1.0" : "1.4")+"\tSO:unsorted");
25 		//		}
26 		bb.append("@HD\tVN:");
27 		bb.append((SamLine.VERSION<1.4f ? "1.3" : "1.4"));
28 		bb.append("\tSO:unsorted");
29 		return bb;
30 	}
31 
header0()32 	public static StringBuilder header0(){
33 		//		if(MAKE_TOPHAT_TAGS){
34 		//			return new StringBuilder("@HD\tVN:"+(SamLine.VERSION<1.4f ? "1.0" : "1.4")+"\tSO:unsorted");
35 		//		}
36 		StringBuilder sb=new StringBuilder("@HD\tVN:"+(SamLine.VERSION<1.4f ? "1.3" : "1.4")+"\tSO:unsorted");
37 		return sb;
38 	}
39 
scaffolds(int minChrom, int maxChrom, boolean sort)40 	static ArrayList<String> scaffolds(int minChrom, int maxChrom, boolean sort){
41 		final ArrayList<String> list=new ArrayList<String>(4000);
42 		final StringBuilder sb=new StringBuilder(1000);
43 		for(int i=minChrom; i<=maxChrom && i<=Data.numChroms; i++){
44 			final byte[][] inames=Data.scaffoldNames[i];
45 			for(int j=0; j<Data.chromScaffolds[i]; j++){
46 				final byte[] scn=inames[j];
47 				sb.append("@SQ\tSN:");//+Data.scaffoldNames[i][j]);
48 				if(scn==null){
49 					assert(false) : "scaffoldName["+i+"]["+j+"] = null";
50 					sb.append("null");
51 				}else{
52 					appendScafName(sb, scn);
53 				}
54 				sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j])));
55 				//				sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]+1000L)));
56 				//				sb.append("\tAS:"+((Data.name==null ? "" : Data.name+" ")+"b"+Data.GENOME_BUILD).replace('\t', ' '));
57 
58 				sb.append('\n');
59 				list.add(sb.toString());
60 				sb.setLength(0);
61 			}
62 		}
63 		if(sort){Shared.sort(list);}
64 		return list;
65 	}
66 
header1(int minChrom, int maxChrom)67 	public static StringBuilder header1(int minChrom, int maxChrom){
68 		StringBuilder sb=new StringBuilder(20000);
69 		if(SamLine.SORT_SCAFFOLDS){
70 			ArrayList<String> scaffolds=scaffolds(minChrom, maxChrom, true);
71 			for(int i=0; i<scaffolds.size(); i++){
72 				sb.append(scaffolds.get(i));
73 				scaffolds.set(i, null);
74 			}
75 			return sb;
76 		}
77 
78 		for(int i=minChrom; i<=maxChrom && i<=Data.numChroms; i++){
79 			final byte[][] inames=Data.scaffoldNames[i];
80 			for(int j=0; j<Data.chromScaffolds[i]; j++){
81 				byte[] scn=inames[j];
82 				sb.append("@SQ\tSN:");//+Data.scaffoldNames[i][j]);
83 				if(scn==null){
84 					assert(false) : "scaffoldName["+i+"]["+j+"] = null";
85 					sb.append("null");
86 				}else{
87 					appendScafName(sb, scn);
88 				}
89 
90 				sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j])));
91 				//				sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]+1000L)));
92 				//				sb.append("\tAS:"+((Data.name==null ? "" : Data.name+" ")+"build "+Data.GENOME_BUILD).replace('\t', ' '));
93 
94 				sb.append('\n');
95 			}
96 		}
97 
98 		return sb;
99 	}
100 
printHeader1(int minChrom, int maxChrom, PrintWriter pw)101 	public static void printHeader1(int minChrom, int maxChrom, PrintWriter pw){
102 		if(SamLine.SORT_SCAFFOLDS){
103 			ArrayList<String> scaffolds=scaffolds(minChrom, maxChrom, true);
104 			for(int i=0; i<scaffolds.size(); i++){
105 				pw.print(scaffolds.set(i, null));
106 			}
107 			return;
108 		}
109 
110 		for(int i=minChrom; i<=maxChrom && i<=Data.numChroms; i++){
111 			final byte[][] inames=Data.scaffoldNames[i];
112 			StringBuilder sb=new StringBuilder(256);
113 			for(int j=0; j<Data.chromScaffolds[i]; j++){
114 				final byte[] scn=inames[j];
115 				//				StringBuilder sb=new StringBuilder(7+(scn==null ? 4 : scn.length)+4+10+4+/*(Data.name==null ? 0 : Data.name.length()+1)+11*/+4);//last one could be 1
116 				sb.append("@SQ\tSN:");//+Data.scaffoldNames[i][j]);
117 				if(scn==null){
118 					assert(false) : "scaffoldName["+i+"]["+j+"] = null";
119 					sb.append("null");
120 				}else{
121 					appendScafName(sb, scn);
122 				}
123 				sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j])));
124 				//				sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]+1000L)));
125 				//				sb.append("\tAS:"+((Data.name==null ? "" : Data.name+" ")+"b"+Data.GENOME_BUILD).replace('\t', ' '));
126 
127 				sb.append('\n');
128 
129 				pw.print(sb);
130 				sb.setLength(0);
131 			}
132 		}
133 	}
134 
printHeader1B(int minChrom, int maxChrom, ByteBuilder bb, OutputStream os)135 	public static void printHeader1B(int minChrom, int maxChrom, ByteBuilder bb, OutputStream os){
136 		if(verbose){System.err.println("printHeader1B("+minChrom+", "+maxChrom+")");}
137 
138 		if(SamLine.SORT_SCAFFOLDS){
139 			if(verbose){System.err.println("Sorting scaffolds");}
140 			ArrayList<String> scaffolds=scaffolds(minChrom, maxChrom, true);
141 			for(int i=0; i<scaffolds.size(); i++){
142 				String s=scaffolds.set(i, null);
143 				bb.append(s);
144 				if(bb.length>=32768){
145 					try {
146 						os.write(bb.array, 0, bb.length);
147 					} catch (IOException e) {
148 						throw new RuntimeException(e);
149 					}
150 					bb.setLength(0);
151 				}
152 			}
153 			return;
154 		}
155 
156 		if(verbose){System.err.println("Iterating over chroms");}
157 		for(int chrom=minChrom; chrom<=maxChrom && chrom<=Data.numChroms; chrom++){
158 			//			if(verbose){System.err.println("chrom "+chrom);}
159 			final byte[][] inames=Data.scaffoldNames[chrom];
160 			//			if(verbose){System.err.println("inames"+(inames==null ? " = null" : ".length = "+inames.length));}
161 			final int numScafs=Data.chromScaffolds[chrom];
162 			//			if(verbose){System.err.println("scaffolds: "+numScafs);}
163 			assert(inames.length==numScafs) : "Mismatch between number of scaffolds and names for chrom "+chrom+": "+inames.length+" != "+numScafs;
164 			for(int scaf=0; scaf<numScafs; scaf++){
165 				//				if(verbose){System.err.println("chromScaffolds["+scaf+"] = "+(inames==null ? "=null" : ".length="+inames.length));}
166 				final byte[] scafName=inames[scaf];
167 				//				if(verbose){System.err.println("scafName = "+(scafName==null ? "null" : new String(scafName)));}
168 				bb.append("@SQ\tSN:");//+Data.scaffoldNames[i][j]);
169 				if(scafName==null){
170 					assert(false) : "scaffoldName["+chrom+"]["+scaf+"] = null";
171 					bb.append(scafName);
172 				}else{
173 					appendScafName(bb, scafName);
174 				}
175 				bb.append("\tLN:");
176 				bb.append(Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[chrom][scaf])));
177 				//				sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]+1000L)));
178 				//				sb.append("\tAS:"+((Data.name==null ? "" : Data.name+" ")+"b"+Data.GENOME_BUILD).replace('\t', ' '));
179 
180 				bb.nl();
181 
182 				if(bb.length>=32768){
183 					try {
184 						os.write(bb.array, 0, bb.length);
185 					} catch (IOException e) {
186 						throw new RuntimeException(e);
187 					}
188 					bb.setLength(0);
189 				}
190 			}
191 		}
192 	}
193 
printHeader1(int minChrom, int maxChrom, TextStreamWriter tsw)194 	public static void printHeader1(int minChrom, int maxChrom, TextStreamWriter tsw){
195 		if(SamLine.SORT_SCAFFOLDS){
196 			ArrayList<String> scaffolds=scaffolds(minChrom, maxChrom, true);
197 			for(int i=0; i<scaffolds.size(); i++){
198 				tsw.print(scaffolds.set(i, null));
199 			}
200 			return;
201 		}
202 
203 		for(int i=minChrom; i<=maxChrom && i<=Data.numChroms; i++){
204 			final byte[][] inames=Data.scaffoldNames[i];
205 			final StringBuilder sb=new StringBuilder(256);
206 			for(int j=0; j<Data.chromScaffolds[i]; j++){
207 				final byte[] scn=inames[j];
208 				//				StringBuilder sb=new StringBuilder(7+(scn==null ? 4 : scn.length)+4+10+4+/*(Data.name==null ? 0 : Data.name.length()+1)+11*/+4);//last one could be 1
209 				sb.append("@SQ\tSN:");//+Data.scaffoldNames[i][j]);
210 				if(scn==null){
211 					assert(false) : "scaffoldName["+i+"]["+j+"] = null";
212 					sb.append("null");
213 				}else{
214 					appendScafName(sb, scn);
215 				}
216 				sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j])));
217 				//				sb.append("\tLN:"+Tools.min(Integer.MAX_VALUE, (Data.scaffoldLengths[i][j]+1000L)));
218 				//				sb.append("\tAS:"+((Data.name==null ? "" : Data.name+" ")+"b"+Data.GENOME_BUILD).replace('\t', ' '));
219 
220 				sb.append('\n');
221 
222 				tsw.print(sb);
223 				sb.setLength(0);
224 			}
225 		}
226 	}
227 
appendScafName(StringBuilder sb, byte[] scn)228 	static void appendScafName(StringBuilder sb, byte[] scn){
229 		if(Data.scaffoldPrefixes){
230 			int k=0;
231 			while(k<scn.length && scn[k]!='$'){k++;}
232 			k++;
233 			while(k<scn.length){
234 				sb.append((char)scn[k]);
235 				k++;
236 			}
237 		}else{
238 			final char[] buffer=Shared.getTLCB(scn.length);
239 			for(int i=0; i<scn.length; i++){buffer[i]=(char)scn[i];}
240 			sb.append(buffer, 0, scn.length);
241 		}
242 	}
243 
appendScafName(ByteBuilder sb, byte[] scn)244 	static void appendScafName(ByteBuilder sb, byte[] scn){
245 		if(Data.scaffoldPrefixes){
246 			int k=0;
247 			while(k<scn.length && scn[k]!='$'){k++;}
248 			k++;
249 			while(k<scn.length){
250 				sb.append(scn[k]);
251 				k++;
252 			}
253 		}else{
254 			sb.append(scn);
255 		}
256 	}
257 
header2()258 	public static StringBuilder header2(){
259 		StringBuilder sb=new StringBuilder(1000);
260 		//		sb.append("@RG\tID:unknownRG\tSM:unknownSM\tPL:ILLUMINA\n"); //Can cause problems.  If RG is in the header, reads may need extra fields.
261 
262 		//		if(MAKE_TOPHAT_TAGS){
263 		////			sb.append("@PG\tID:TopHat\tVN:2.0.6\tCL:/usr/common/jgi/aligners/tophat/2.0.6/bin/tophat -p 16 -r 0 --max-multihits 1 Creinhardtii_236 reads_1.fa reads_2.fa");
264 		//			sb.append("@PG\tID:TopHat\tVN:2.0.6");
265 		//		}else{
266 		//			sb.append("@PG\tID:BBMap\tPN:BBMap\tVN:"+Shared.BBMAP_VERSION_STRING);
267 		//		}
268 
269 		if(SamLine.READGROUP_ID!=null){
270 			sb.append("@RG\tID:").append(SamLine.READGROUP_ID);
271 			if(SamLine.READGROUP_CN!=null){sb.append("\tCN:").append(SamLine.READGROUP_CN);}
272 			if(SamLine.READGROUP_DS!=null){sb.append("\tDS:").append(SamLine.READGROUP_DS);}
273 			if(SamLine.READGROUP_DT!=null){sb.append("\tDT:").append(SamLine.READGROUP_DT);}
274 			if(SamLine.READGROUP_FO!=null){sb.append("\tFO:").append(SamLine.READGROUP_FO);}
275 			if(SamLine.READGROUP_KS!=null){sb.append("\tKS:").append(SamLine.READGROUP_KS);}
276 			if(SamLine.READGROUP_LB!=null){sb.append("\tLB:").append(SamLine.READGROUP_LB);}
277 			if(SamLine.READGROUP_PG!=null){sb.append("\tPG:").append(SamLine.READGROUP_PG);}
278 			if(SamLine.READGROUP_PI!=null){sb.append("\tPI:").append(SamLine.READGROUP_PI);}
279 			if(SamLine.READGROUP_PL!=null){sb.append("\tPL:").append(SamLine.READGROUP_PL);}
280 			if(SamLine.READGROUP_PU!=null){sb.append("\tPU:").append(SamLine.READGROUP_PU);}
281 			if(SamLine.READGROUP_SM!=null){sb.append("\tSM:").append(SamLine.READGROUP_SM);}
282 			sb.append('\n');
283 		}
284 
285 		sb.append("@PG\tID:BBMap\tPN:BBMap\tVN:");
286 		sb.append(Shared.BBMAP_VERSION_STRING);
287 
288 		if(Shared.BBMAP_CLASS!=null){
289 			sb.append("\tCL:java");
290 			{
291 				List<String> list=null;
292 				list=Shared.JVM_ARGS();
293 				if(list!=null){
294 					for(String s : list){
295 						sb.append(' ');
296 						sb.append(s);
297 					}
298 				}
299 			}
300 			sb.append(" align2."+Shared.BBMAP_CLASS);
301 			if(Shared.COMMAND_LINE!=null){
302 				for(String s : Shared.COMMAND_LINE){
303 					sb.append(' ');
304 					sb.append(s);
305 				}
306 			}
307 		}
308 
309 		return sb;
310 	}
311 
header2B(ByteBuilder sb)312 	public static ByteBuilder header2B(ByteBuilder sb){
313 
314 		if(SamLine.READGROUP_ID!=null){
315 			sb.append("@RG\tID:").append(SamLine.READGROUP_ID);
316 			if(SamLine.READGROUP_CN!=null){sb.append("\tCN:").append(SamLine.READGROUP_CN);}
317 			if(SamLine.READGROUP_DS!=null){sb.append("\tDS:").append(SamLine.READGROUP_DS);}
318 			if(SamLine.READGROUP_DT!=null){sb.append("\tDT:").append(SamLine.READGROUP_DT);}
319 			if(SamLine.READGROUP_FO!=null){sb.append("\tFO:").append(SamLine.READGROUP_FO);}
320 			if(SamLine.READGROUP_KS!=null){sb.append("\tKS:").append(SamLine.READGROUP_KS);}
321 			if(SamLine.READGROUP_LB!=null){sb.append("\tLB:").append(SamLine.READGROUP_LB);}
322 			if(SamLine.READGROUP_PG!=null){sb.append("\tPG:").append(SamLine.READGROUP_PG);}
323 			if(SamLine.READGROUP_PI!=null){sb.append("\tPI:").append(SamLine.READGROUP_PI);}
324 			if(SamLine.READGROUP_PL!=null){sb.append("\tPL:").append(SamLine.READGROUP_PL);}
325 			if(SamLine.READGROUP_PU!=null){sb.append("\tPU:").append(SamLine.READGROUP_PU);}
326 			if(SamLine.READGROUP_SM!=null){sb.append("\tSM:").append(SamLine.READGROUP_SM);}
327 			sb.append('\n');
328 		}
329 
330 		sb.append("@PG\tID:BBMap\tPN:BBMap\tVN:");
331 		sb.append(Shared.BBMAP_VERSION_STRING);
332 
333 		if(Shared.BBMAP_CLASS!=null){
334 			sb.append("\tCL:java");
335 			{
336 				List<String> list=null;
337 				list=Shared.JVM_ARGS();
338 				if(list!=null){
339 					for(String s : list){
340 						sb.append(' ');
341 						sb.append(s);
342 					}
343 				}
344 			}
345 			sb.append(" align2."+Shared.BBMAP_CLASS);
346 			if(Shared.COMMAND_LINE!=null){
347 				for(String s : Shared.COMMAND_LINE){
348 					sb.append(' ');
349 					sb.append(s);
350 				}
351 			}
352 		}
353 
354 		return sb;
355 	}
356 
357 	private static final boolean verbose=false;
358 
359 }
360