1 package stream;
2 
3 import java.util.ArrayList;
4 
5 import dna.AminoAcid;
6 import dna.ChromosomeArray;
7 import dna.Data;
8 import shared.KillSwitch;
9 import shared.Shared;
10 import shared.Tools;
11 
12 public class SequentialReadInputStream extends ReadInputStream {
13 
SequentialReadInputStream(long maxReads_, int readlen_, int minreadlen_, int overlap_, boolean alternateStrand_)14 	public SequentialReadInputStream(long maxReads_, int readlen_, int minreadlen_, int overlap_, boolean alternateStrand_){
15 
16 		maxReads=(maxReads_<0 ? Long.MAX_VALUE : maxReads_);
17 		readlen=readlen_;
18 		minReadlen=minreadlen_;
19 		POSITION_INCREMENT=readlen;
20 		overlap=overlap_;
21 		alternateStrand=alternateStrand_;
22 		assert(overlap<POSITION_INCREMENT);
23 
24 		maxPosition=Data.chromLengths[1];
25 		maxChrom=Data.numChroms;
26 
27 		restart();
28 	}
29 
30 	@Override
31 	public void start(){}
32 
33 	@Override
34 	public void restart(){
35 		position=0;
36 		chrom=1;
37 		generated=0;
38 		consumed=0;
39 		next=0;
40 		buffer=null;
41 	}
42 
43 	@Override
44 	public boolean paired() {
45 		return false;
46 	}
47 
48 	@Override
49 	public boolean close() {return false;}
50 
51 	@Override
52 	public boolean hasMore() {
53 		if(verbose){
54 			System.out.println("Called hasMore(): "+(id>=maxReads)+", "+(chrom<maxChrom)+", "+(position<=maxPosition)+", "+(buffer==null || next>=BUF_LEN));
55 			System.out.println(id+", "+maxReads+", "+chrom+", "+maxChrom+", "+position+", "+maxPosition+", "+buffer+", "+next+", "+(buffer==null ? -1 : BUF_LEN));
56 		}
57 //		if(buffer==null || next>=buffer.size()){
58 //			if(tf.isOpen()){
59 //				fillBuffer();
60 //			}else{
61 //				assert(generated>0) : "Was the file empty?";
62 //			}
63 //		}
64 //		return (buffer!=null && next<buffer.size());
65 		if(id>=maxReads){return false;}
66 		if(chrom<maxChrom){return true;}
67 		if(position<=maxPosition){return true;}
68 		if(buffer==null || next>=buffer.size()){return false;}
69 		return true;
70 	}
71 
72 	@Override
next()73 	public Read next() {
74 		if(!hasMore()){return null;}
75 		if(buffer==null || next>=buffer.size()){fillBuffer();}
76 		Read r=buffer.get(next);
77 		buffer.set(next, null);
78 		next++;
79 		consumed++;
80 		return r;
81 	}
82 
83 	@Override
nextList()84 	public synchronized ArrayList<Read> nextList() {
85 		if(next!=0){throw new RuntimeException("'next' should not be used when doing blockwise access.");}
86 		if(!hasMore()){return null;}
87 		if(buffer==null || next>=buffer.size()){fillBuffer();}
88 		ArrayList<Read> r=buffer;
89 		buffer=null;
90 		if(r!=null && r.size()==0){r=null;}
91 		consumed+=(r==null ? 0 : r.size());
92 		return r;
93 	}
94 
fillBuffer()95 	private synchronized void fillBuffer(){
96 //		System.out.println("fill "+chrom+", "+position);
97 		buffer=null;
98 		if(chrom>maxChrom){return;}
99 		ChromosomeArray cha=Data.getChromosome(chrom);
100 		next=0;
101 
102 		if(position==0){
103 			while(position<=maxPosition && !AminoAcid.isFullyDefined((char)cha.get(position))){position++;}
104 		}
105 
106 		ArrayList<Read> reads=new ArrayList<Read>(BUF_LEN);
107 		int index=0;
108 
109 		while(position<=maxPosition && index<buffer.size() && id<maxReads){
110 			int start=position;
111 			int stop=Tools.min(position+readlen-1, cha.maxIndex);
112 			byte[] s=cha.getBytes(start, stop);
113 //			assert(s.length==readlen) : s.length+", "+readlen;
114 
115 			if(s.length<1 || !AminoAcid.isFullyDefined(s)){
116 				int firstGood=-1, lastGood=-1;
117 				for(int i=0; i<s.length; i++){
118 					if(AminoAcid.isFullyDefined(s[i])){
119 						lastGood=i;
120 						if(firstGood==-1){firstGood=i;}
121 					}
122 				}
123 				if(lastGood-firstGood+1>=minReadlen){
124 					start=start+firstGood;
125 					stop=stop-(s.length-lastGood-1);
126 					s=KillSwitch.copyOfRange(s, firstGood, lastGood+1);
127 					assert(s.length==lastGood-firstGood+1);
128 				}else{
129 					s=null;
130 				}
131 			}
132 
133 			if(s!=null){
134 				Read r=new Read(s, null, id, chrom, start, stop, Shared.PLUS);
135 				if(alternateStrand && (r.numericID&1)==1){r.reverseComplement();}
136 				r.setSynthetic(true);
137 //				System.out.println("Made read: "+r);
138 //				assert(id!=54406) : "\n"+r.toString()+"\nbases: "+s.length+"\nstart: "+start+"\nstop: "+stop+"\nminlen: "+minReadlen+"\n";
139 
140 				reads.add(r);
141 				index++;
142 				position+=(POSITION_INCREMENT-overlap);
143 				id++;
144 			}else{
145 				//Move to the next defined position
146 				while(AminoAcid.isFullyDefined((char)cha.get(position))){position++;}
147 				while(position<=maxPosition && !AminoAcid.isFullyDefined((char)cha.get(position))){position++;}
148 			}
149 		}
150 //		System.out.println("got "+index+" from "+chrom+", "+position);
151 
152 		if(index==0){
153 			if(UNLOAD && chrom>0){Data.unload(chrom, true);}
154 			chrom++;
155 			position=0;
156 			buffer=null;
157 			fillBuffer();
158 			return;
159 		}
160 
161 		generated+=index;
162 
163 		buffer=reads;
164 	}
165 
166 	private long id=0;
167 
168 	public int position=0;
169 	public int maxPosition;
170 
171 	private int chrom;
172 
173 	private ArrayList<Read> buffer=null;
174 	private int next=0;
175 
176 	private final int BUF_LEN=Shared.bufferLen();;
177 	public static boolean UNLOAD=false;
178 
179 	public long generated=0;
180 	public long consumed=0;
181 
182 	public final long maxReads;
183 	public final int readlen;
184 	public final int POSITION_INCREMENT;
185 	public final int minReadlen;
186 	public final int maxChrom;
187 	public final int overlap;
188 	public final boolean alternateStrand;
189 
190 	public static boolean verbose=false;
191 
192 }
193