1 package jgi; 2 3 import java.io.PrintStream; 4 import java.util.Random; 5 6 import dna.AminoAcid; 7 import fileIO.ByteStreamWriter; 8 import fileIO.FileFormat; 9 import fileIO.ReadWrite; 10 import shared.Parse; 11 import shared.Parser; 12 import shared.PreParser; 13 import shared.Shared; 14 import shared.Timer; 15 import shared.Tools; 16 import structures.ByteBuilder; 17 18 /** 19 * @author Brian Bushnell 20 * @date Jan 3, 2013 21 * 22 */ 23 public class RandomGenome { 24 main(String[] args)25 public static void main(String[] args){ 26 //Start a timer immediately upon code entrance. 27 Timer t=new Timer(); 28 29 //Create an instance of this class 30 RandomGenome x=new RandomGenome(args); 31 32 //Run the object 33 x.process(t); 34 35 //Close the print stream if it was redirected 36 Shared.closeStream(x.outstream); 37 } 38 RandomGenome(String[] args)39 public RandomGenome(String[] args){ 40 41 {//Preparse block for help, config files, and outstream 42 PreParser pp=new PreParser(args, getClass(), false); 43 args=pp.args; 44 outstream=pp.outstream; 45 } 46 47 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; 48 ReadWrite.MAX_ZIP_THREADS=Shared.threads(); 49 50 Parser parser=new Parser(); 51 for(int i=0; i<args.length; i++){ 52 String arg=args[i]; 53 String[] split=arg.split("="); 54 String a=split[0].toLowerCase(); 55 String b=split.length>1 ? split[1] : null; 56 if(b!=null && b.equalsIgnoreCase("null")){b=null;} 57 58 if(a.equals("chroms")){ 59 chroms=Parse.parseIntKMG(b); 60 }else if(a.equals("len") || a.equals("length") || a.equals("size")){ 61 totalLength=Parse.parseKMG(b); 62 }else if(a.equals("pad")){ 63 pad=Tools.max(0, Parse.parseIntKMG(b)); 64 }else if(a.equals("gc")){ 65 gc=Float.parseFloat(b); 66 }else if(a.equals("verbose")){ 67 verbose=Parse.parseBoolean(b); 68 ReadWrite.verbose=verbose; 69 }else if(a.equals("nohomopolymers") || a.equals("banhomopolymers") || a.equals("nopoly")){ 70 noPoly=Parse.parseBoolean(b); 71 }else if(a.equals("includestop") || a.equals("stop")){ 72 includeStop=Parse.parseBoolean(b); 73 }else if(a.equals("seed")){ 74 seed=Long.parseLong(b); 75 }else if(parser.parse(arg, a, b)){ 76 //do nothing 77 }else{ 78 outstream.println("Unknown parameter "+args[i]); 79 assert(false) : "Unknown parameter "+args[i]; 80 // throw new RuntimeException("Unknown parameter "+args[i]); 81 } 82 } 83 84 {//Process parser fields 85 overwrite=parser.overwrite; 86 append=parser.append; 87 88 out=parser.out1; 89 } 90 91 wrap=Shared.FASTA_WRAP; 92 assert(wrap>0) : "Wrap is too small."; 93 assert(chroms>0) : "Chroms must be greater than 0."; 94 assert(totalLength>=chroms) : "Length must be at least chroms."; 95 assert(2*pad+totalLength/chroms<Shared.MAX_ARRAY_LEN) : "Length per chrom must be at most "+Shared.MAX_ARRAY_LEN; 96 chromLength=(int)(totalLength/chroms); 97 98 if(out!=null && out.equalsIgnoreCase("null")){out=null;} 99 100 if(!Tools.testOutputFiles(overwrite, append, false, out)){ 101 outstream.println((out==null)+", "+out); 102 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n"); 103 } 104 105 ffout=FileFormat.testOutput(out, FileFormat.FA, null, true, overwrite, append, false); 106 107 randy=Shared.threadLocalRandom(seed); 108 } 109 process(Timer t)110 void process(Timer t){ 111 if(Shared.AMINO_IN){ 112 processAmino(t); 113 }else{ 114 processNucleotide(t); 115 } 116 } 117 processNucleotide(Timer t)118 void processNucleotide(Timer t){ 119 120 ByteStreamWriter bsw=new ByteStreamWriter(ffout); 121 bsw.start(); 122 123 for(int chrom=1; chrom<=chroms; chrom++){ 124 bsw.print('>').print("contig").println(chrom); 125 ByteBuilder bb=new ByteBuilder(wrap+1); 126 byte prev='N'; 127 final int max=chromLength+2*pad; 128 final int pad2=chromLength+pad; 129 if(gc==0.5f){ 130 for(int i=0; i<max; ){ 131 for(int j=0; j<wrap && i<max; i++, j++){ 132 byte b; 133 if(i<pad || i>=pad2){b='N';} 134 else{ 135 b=AminoAcid.numberToBase[randy.nextInt(4)]; 136 while(noPoly && b==prev){b=AminoAcid.numberToBase[randy.nextInt(4)];} 137 } 138 bb.append(b); 139 prev=b; 140 } 141 bb.nl(); 142 bsw.print(bb); 143 bb.clear(); 144 } 145 }else{ 146 for(int i=0; i<max; ){ 147 for(int j=0; j<wrap && i<max; i++, j++){ 148 boolean at=randy.nextFloat()>=gc; 149 char b; 150 if(i<pad || i>=pad2){b='N';} 151 else{ 152 if(at){ 153 b=randy.nextBoolean() ? 'A' : 'T'; 154 }else{ 155 b=randy.nextBoolean() ? 'C' : 'G'; 156 } 157 while(noPoly && b==prev){ 158 if(at){ 159 b=randy.nextBoolean() ? 'A' : 'T'; 160 }else{ 161 b=randy.nextBoolean() ? 'C' : 'G'; 162 } 163 } 164 } 165 bb.append(b); 166 prev=(byte)b; 167 } 168 bb.nl(); 169 bsw.print(bb); 170 bb.clear(); 171 } 172 } 173 } 174 bsw.poison(); 175 bsw.waitForFinish(); 176 } 177 processAmino(Timer t)178 void processAmino(Timer t){ 179 180 ByteStreamWriter bsw=new ByteStreamWriter(ffout); 181 bsw.start(); 182 183 final byte[] acids=AminoAcid.numberToAcid; 184 final int limit=(includeStop ? acids.length : acids.length-1); 185 for(int chrom=1; chrom<=chroms; chrom++){ 186 bsw.print('>').print("gene").println(chrom); 187 ByteBuilder bb=new ByteBuilder(wrap+1); 188 byte prev='X'; 189 final int max=chromLength+2*pad; 190 final int pad2=chromLength+pad; 191 for(int i=0; i<max; ){ 192 for(int j=0; j<wrap && i<max; i++, j++){ 193 byte b; 194 if(i<pad || i>=pad2){b='X';} 195 else{ 196 b=acids[randy.nextInt(limit)]; 197 while(noPoly && b==prev){b=acids[randy.nextInt(limit)];} 198 } 199 bb.append(b); 200 prev=b; 201 } 202 bb.nl(); 203 bsw.print(bb); 204 bb.clear(); 205 } 206 } 207 bsw.poison(); 208 bsw.waitForFinish(); 209 } 210 211 /*--------------------------------------------------------------*/ 212 213 private String out=null; 214 215 int chroms=1; 216 long totalLength=1000000; 217 float gc=0.5f; 218 final int chromLength; 219 final int wrap; 220 int pad=0; 221 boolean noPoly=false; 222 boolean includeStop=false; 223 long seed=-1; 224 225 /*--------------------------------------------------------------*/ 226 227 final Random randy; 228 229 private long linesOut=0; 230 private long bytesOut=0; 231 232 /*--------------------------------------------------------------*/ 233 234 private final FileFormat ffout; 235 236 /*--------------------------------------------------------------*/ 237 238 private PrintStream outstream=System.err; 239 public static boolean verbose=false; 240 public boolean errorState=false; 241 private boolean overwrite=false; 242 private boolean append=false; 243 244 } 245