1 package structures;
2 
3 import java.util.Arrays;
4 
5 import dna.AminoAcid;
6 import shared.KillSwitch;
7 import shared.Tools;
8 import stream.Read;
9 
10 /**
11  * Tracks the number of homopolymers observed of given lengths.
12  * Only the longest homopolymer for a given base is counted per sequence.
13  * @author Brian Bushnell
14  * @date August 27, 2018
15  *
16  */
17 public class PolymerTracker {
18 
19 	/*--------------------------------------------------------------*/
20 	/*----------------        Initialization        ----------------*/
21 	/*--------------------------------------------------------------*/
22 
PolymerTracker()23 	public PolymerTracker(){
24 		reset();
25 	}
26 
reset()27 	public void reset(){
28 		Arrays.fill(maxACGTN, 0);
29 		for(int i=0; i<5; i++){
30 			countsACGTN[i]=new LongList();
31 		}
32 	}
33 
34 	/*--------------------------------------------------------------*/
35 	/*----------------      Public Add Methods      ----------------*/
36 	/*--------------------------------------------------------------*/
37 
addPair(Read r)38 	public void addPair(Read r){
39 		if(r==null){return;}
40 		add(r.bases);
41 		add(r.mate);
42 	}
43 
add(Read r)44 	public void add(Read r){
45 		if(r==null){return;}
46 		add(r.bases);
47 	}
48 
add(PolymerTracker pt)49 	public void add(PolymerTracker pt){
50 		for(int i=0; i<5; i++){
51 			LongList list=countsACGTN[i];
52 			LongList ptList=pt.countsACGTN[i];
53 			for(int len=0; len<ptList.size; len++){
54 				long count=ptList.get(len);
55 				list.increment(len, count);
56 			}
57 		}
58 	}
59 
add(byte[] bases)60 	public void add(byte[] bases){
61 		if(bases==null || bases.length<1){return;}
62 		if(PER_SEQUENCE){
63 			addPerSequence(bases);
64 		}else{
65 			addPerPolymer(bases);
66 		}
67 	}
68 
69 	/*--------------------------------------------------------------*/
70 	/*----------------           Methods            ----------------*/
71 	/*--------------------------------------------------------------*/
72 
accumulate()73 	public LongList[] accumulate(){
74 		if(cumulativeACGTN!=null){return cumulativeACGTN;}
75 		LongList[] sums=new LongList[5];
76 		for(int i=0; i<5; i++){//Make reverse-cumulative version
77 			LongList list=countsACGTN[i];
78 			LongList sumList=new LongList(list.size);
79 			for(int len=list.size-1; len>=0; len--){
80 				sumList.set(len, sumList.get(len+1)+list.get(len));
81 			}
82 			sums[i]=sumList;
83 		}
84 		cumulativeACGTN=sums;
85 		return sums;
86 	}
87 
88 	//Non-cumulative
toHistogram()89 	public String toHistogram(){
90 		StringBuilder sb=new StringBuilder();
91 		sb.append("#Length\tA\tC\tG\tT\tN\n");
92 
93 		final int maxIndex=longest();
94 		for(int len=0; len<maxIndex; len++){
95 			sb.append(len);
96 			for(int i=0; i<5; i++){
97 				long count=countsACGTN[i].get(len);
98 				sb.append('\t').append(count);
99 			}
100 			sb.append('\n');
101 		}
102 		return sb.toString();
103 	}
104 
105 	//Cumulative
toHistogramCumulative()106 	public String toHistogramCumulative(){
107 		LongList[] sums=accumulate();
108 
109 		StringBuilder sb=new StringBuilder();
110 		sb.append("#Length\tA\tC\tG\tT\tN\n");
111 
112 		final int maxIndex=longest();
113 		for(int len=0; len<maxIndex; len++){
114 			sb.append(len);
115 			for(int i=0; i<5; i++){
116 				long count=sums[i].get(len);
117 				sb.append('\t').append(count);
118 			}
119 			sb.append('\n');
120 		}
121 		return sb.toString();
122 	}
123 
calcRatio(byte base1, byte base2, int length)124 	public double calcRatio(byte base1, byte base2, int length){
125 		long count1=getCount(base1, length);
126 		long count2=getCount(base2, length);
127 		return count1/Tools.max(1.0, count2);
128 	}
129 
getCount(byte base, int length)130 	public long getCount(byte base, int length) {
131 		int x=AminoAcid.baseToNumberACGTN[base];
132 		return countsACGTN[x].get(length);
133 	}
134 
calcRatioCumulative(byte base1, byte base2, int length)135 	public double calcRatioCumulative(byte base1, byte base2, int length){
136 		long count1=getCountCumulative(base1, length);
137 		long count2=getCountCumulative(base2, length);
138 		return count1/Tools.max(1.0, count2);
139 	}
140 
getCountCumulative(byte base, int length)141 	public long getCountCumulative(byte base, int length) {
142 		int x=AminoAcid.baseToNumberACGTN[base];
143 		return accumulate()[x].get(length);
144 	}
145 
146 	/*--------------------------------------------------------------*/
147 	/*----------------        Inner Methods         ----------------*/
148 	/*--------------------------------------------------------------*/
149 
150 	/** Increment once per sequence,
151 	 * for the longest homopolymer of each base */
addPerSequence(byte[] bases)152 	private void addPerSequence(byte[] bases){
153 		Arrays.fill(maxACGTN, 0);
154 		byte prev=-1;
155 		int current=0;
156 		for(byte b : bases){
157 			if(b==prev){
158 				current++;
159 			}else{
160 				recordMax(prev, current);
161 				prev=b;
162 				current=1;
163 			}
164 		}
165 		recordMax(prev, current);
166 
167 		for(int i=0; i<maxACGTN.length; i++){
168 			countsACGTN[i].increment(maxACGTN[i], 1);
169 		}
170 	}
171 
172 	/** Increment once per homopolymer */
addPerPolymer(byte[] bases)173 	private void addPerPolymer(byte[] bases){
174 		byte prev=-1;
175 		int current=0;
176 		for(byte b : bases){
177 			if(b==prev){
178 				current++;
179 			}else{
180 				recordCounts(prev, current);
181 				prev=b;
182 				current=1;
183 			}
184 		}
185 		recordCounts(prev, current);
186 	}
187 
recordMax(byte base, int len)188 	private void recordMax(byte base, int len){
189 		if(base<0){return;}
190 		int x=AminoAcid.baseToNumberACGTN[base];
191 		if(x<0){return;}
192 		maxACGTN[x]=Tools.max(maxACGTN[x], len);
193 	}
194 
recordCounts(byte base, int len)195 	private void recordCounts(byte base, int len){
196 		if(base<0){return;}
197 		int x=AminoAcid.baseToNumberACGTN[base];
198 		if(x<0){return;}
199 		countsACGTN[x].increment(len, 1);
200 	}
201 
longest()202 	private int longest(){
203 		int max=0;
204 		for(LongList list : countsACGTN){
205 			max=Tools.max(list.size(), max);
206 		}
207 		return max;
208 	}
209 
210 	/*--------------------------------------------------------------*/
211 	/*----------------            Fields            ----------------*/
212 	/*--------------------------------------------------------------*/
213 
214 	private final int[] maxACGTN=KillSwitch.allocInt1D(5);
215 	final LongList[] countsACGTN=new LongList[5];
216 	private LongList[] cumulativeACGTN;
217 
218 	/*--------------------------------------------------------------*/
219 	/*----------------        Static Fields         ----------------*/
220 	/*--------------------------------------------------------------*/
221 
222 	public static boolean PER_SEQUENCE=true;
223 	public static boolean CUMULATIVE=true;
224 
225 }
226