1#!/usr/local/bin/bash 2 3usage(){ 4echo " 5Written by Brian Bushnell 6Last modified September 17, 2018 7This script requires at least 10GB RAM. 8It is designed for NERSC and uses hard-coded paths. 9 10Description: Removes all reads that map to selected common microbial contaminant genomes. 11Removes approximately 98.5% of common contaminant reads, with zero false-positives to non-bacteria. 12NOTE! This program uses hard-coded paths and will only run on Nersc systems. 13 14Usage: removemicrobes.sh in=<input file> outu=<clean output file> 15 16Input may be fasta or fastq, compressed or uncompressed. 17 18Parameters: 19in=<file> Input reads. Should already be adapter-trimmed. 20outu=<file> Destination for clean reads. 21outm=<file> Optional destination for contaminant reads. 22threads=auto (t) Set number of threads to use; default is number of logical processors. 23overwrite=t (ow) Set to false to force the program to abort rather than overwrite an existing file. 24interleaved=auto (int) If true, forces fastq input to be paired and interleaved. 25trim=t Trim read ends to remove bases with quality below minq. 26 Values: t (trim both ends), f (neither end), r (right end only), l (left end only). 27untrim=t Undo the trimming after mapping. 28minq=4 Trim quality threshold. 29ziplevel=6 (zl) Set to 1 (lowest) through 9 (max) to change compression level; lower compression is faster. 30 31build=1 Choses which masking mode was used: 32 1 is most stringent and should be used for bacteria. 33 2 uses fewer bacteria for masking (only RefSeq references). 34 3 is only masked for plastids and entropy, for use on anything except bacteria. 35 4 is unmasked. 36 37***** All BBMap parameters can be used; run bbmap.sh for more details. ***** 38 39Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 40" 41} 42 43#This block allows symlinked shellscripts to correctly set classpath. 44pushd . > /dev/null 45DIR="${BASH_SOURCE[0]}" 46while [ -h "$DIR" ]; do 47 cd "$(dirname "$DIR")" 48 DIR="$(readlink "$(basename "$DIR")")" 49done 50cd "$(dirname "$DIR")" 51DIR="$(pwd)/" 52popd > /dev/null 53 54#DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 55CP="$DIR""current/" 56JNI="-Djava.library.path=""$DIR""jni/" 57JNI="" 58 59z="-Xmx6000m" 60z2="-Xms6000m" 61set=0 62 63if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 64 usage 65 exit 66fi 67 68calcXmx () { 69 source "$DIR""/calcmem.sh" 70 setEnvironment 71 parseXmx "$@" 72} 73calcXmx "$@" 74 75function removemicrobes() { 76 local CMD="java $EA $EOOM $z $z2 $JNI -cp $CP align2.BBMap strictmaxindel=4 bwr=0.16 bw=12 ef=0.001 minhits=2 path=/global/projectb/sandbox/gaag/bbtools/commonMicrobes pigz unpigz zl=6 qtrim=r trimq=10 untrim idtag printunmappedcount ztd=2 kfilter=25 maxsites=1 k=13 minid=0.95 idfilter=0.95 minhits=2 build=1 bloomfilter $@" 77 echo $CMD >&2 78 eval $CMD 79} 80 81removemicrobes "$@" 82