1 package stream; 2 3 import java.util.ArrayList; 4 5 import dna.AminoAcid; 6 import dna.ChromosomeArray; 7 import dna.Data; 8 import shared.KillSwitch; 9 import shared.Shared; 10 import shared.Tools; 11 12 public class SequentialReadInputStream extends ReadInputStream { 13 SequentialReadInputStream(long maxReads_, int readlen_, int minreadlen_, int overlap_, boolean alternateStrand_)14 public SequentialReadInputStream(long maxReads_, int readlen_, int minreadlen_, int overlap_, boolean alternateStrand_){ 15 16 maxReads=(maxReads_<0 ? Long.MAX_VALUE : maxReads_); 17 readlen=readlen_; 18 minReadlen=minreadlen_; 19 POSITION_INCREMENT=readlen; 20 overlap=overlap_; 21 alternateStrand=alternateStrand_; 22 assert(overlap<POSITION_INCREMENT); 23 24 maxPosition=Data.chromLengths[1]; 25 maxChrom=Data.numChroms; 26 27 restart(); 28 } 29 30 @Override 31 public void start(){} 32 33 @Override 34 public void restart(){ 35 position=0; 36 chrom=1; 37 generated=0; 38 consumed=0; 39 next=0; 40 buffer=null; 41 } 42 43 @Override 44 public boolean paired() { 45 return false; 46 } 47 48 @Override 49 public boolean close() {return false;} 50 51 @Override 52 public boolean hasMore() { 53 if(verbose){ 54 System.out.println("Called hasMore(): "+(id>=maxReads)+", "+(chrom<maxChrom)+", "+(position<=maxPosition)+", "+(buffer==null || next>=BUF_LEN)); 55 System.out.println(id+", "+maxReads+", "+chrom+", "+maxChrom+", "+position+", "+maxPosition+", "+buffer+", "+next+", "+(buffer==null ? -1 : BUF_LEN)); 56 } 57 // if(buffer==null || next>=buffer.size()){ 58 // if(tf.isOpen()){ 59 // fillBuffer(); 60 // }else{ 61 // assert(generated>0) : "Was the file empty?"; 62 // } 63 // } 64 // return (buffer!=null && next<buffer.size()); 65 if(id>=maxReads){return false;} 66 if(chrom<maxChrom){return true;} 67 if(position<=maxPosition){return true;} 68 if(buffer==null || next>=buffer.size()){return false;} 69 return true; 70 } 71 72 @Override next()73 public Read next() { 74 if(!hasMore()){return null;} 75 if(buffer==null || next>=buffer.size()){fillBuffer();} 76 Read r=buffer.get(next); 77 buffer.set(next, null); 78 next++; 79 consumed++; 80 return r; 81 } 82 83 @Override nextList()84 public synchronized ArrayList<Read> nextList() { 85 if(next!=0){throw new RuntimeException("'next' should not be used when doing blockwise access.");} 86 if(!hasMore()){return null;} 87 if(buffer==null || next>=buffer.size()){fillBuffer();} 88 ArrayList<Read> r=buffer; 89 buffer=null; 90 if(r!=null && r.size()==0){r=null;} 91 consumed+=(r==null ? 0 : r.size()); 92 return r; 93 } 94 fillBuffer()95 private synchronized void fillBuffer(){ 96 // System.out.println("fill "+chrom+", "+position); 97 buffer=null; 98 if(chrom>maxChrom){return;} 99 ChromosomeArray cha=Data.getChromosome(chrom); 100 next=0; 101 102 if(position==0){ 103 while(position<=maxPosition && !AminoAcid.isFullyDefined((char)cha.get(position))){position++;} 104 } 105 106 ArrayList<Read> reads=new ArrayList<Read>(BUF_LEN); 107 int index=0; 108 109 while(position<=maxPosition && index<buffer.size() && id<maxReads){ 110 int start=position; 111 int stop=Tools.min(position+readlen-1, cha.maxIndex); 112 byte[] s=cha.getBytes(start, stop); 113 // assert(s.length==readlen) : s.length+", "+readlen; 114 115 if(s.length<1 || !AminoAcid.isFullyDefined(s)){ 116 int firstGood=-1, lastGood=-1; 117 for(int i=0; i<s.length; i++){ 118 if(AminoAcid.isFullyDefined(s[i])){ 119 lastGood=i; 120 if(firstGood==-1){firstGood=i;} 121 } 122 } 123 if(lastGood-firstGood+1>=minReadlen){ 124 start=start+firstGood; 125 stop=stop-(s.length-lastGood-1); 126 s=KillSwitch.copyOfRange(s, firstGood, lastGood+1); 127 assert(s.length==lastGood-firstGood+1); 128 }else{ 129 s=null; 130 } 131 } 132 133 if(s!=null){ 134 Read r=new Read(s, null, id, chrom, start, stop, Shared.PLUS); 135 if(alternateStrand && (r.numericID&1)==1){r.reverseComplement();} 136 r.setSynthetic(true); 137 // System.out.println("Made read: "+r); 138 // assert(id!=54406) : "\n"+r.toString()+"\nbases: "+s.length+"\nstart: "+start+"\nstop: "+stop+"\nminlen: "+minReadlen+"\n"; 139 140 reads.add(r); 141 index++; 142 position+=(POSITION_INCREMENT-overlap); 143 id++; 144 }else{ 145 //Move to the next defined position 146 while(AminoAcid.isFullyDefined((char)cha.get(position))){position++;} 147 while(position<=maxPosition && !AminoAcid.isFullyDefined((char)cha.get(position))){position++;} 148 } 149 } 150 // System.out.println("got "+index+" from "+chrom+", "+position); 151 152 if(index==0){ 153 if(UNLOAD && chrom>0){Data.unload(chrom, true);} 154 chrom++; 155 position=0; 156 buffer=null; 157 fillBuffer(); 158 return; 159 } 160 161 generated+=index; 162 163 buffer=reads; 164 } 165 166 private long id=0; 167 168 public int position=0; 169 public int maxPosition; 170 171 private int chrom; 172 173 private ArrayList<Read> buffer=null; 174 private int next=0; 175 176 private final int BUF_LEN=Shared.bufferLen();; 177 public static boolean UNLOAD=false; 178 179 public long generated=0; 180 public long consumed=0; 181 182 public final long maxReads; 183 public final int readlen; 184 public final int POSITION_INCREMENT; 185 public final int minReadlen; 186 public final int maxChrom; 187 public final int overlap; 188 public final boolean alternateStrand; 189 190 public static boolean verbose=false; 191 192 } 193