1#!/usr/local/bin/bash
2
3usage(){
4echo "
5Written by Brian Bushnell
6Last modified September 17, 2018
7This script requires at least 10GB RAM.
8It is designed for NERSC and uses hard-coded paths.
9
10Description:  Removes all reads that map to selected common microbial contaminant genomes.
11Removes approximately 98.5% of common contaminant reads, with zero false-positives to non-bacteria.
12NOTE!  This program uses hard-coded paths and will only run on Nersc systems.
13
14Usage:  removemicrobes.sh in=<input file> outu=<clean output file>
15
16Input may be fasta or fastq, compressed or uncompressed.
17
18Parameters:
19in=<file>           Input reads.  Should already be adapter-trimmed.
20outu=<file>         Destination for clean reads.
21outm=<file>         Optional destination for contaminant reads.
22threads=auto        (t) Set number of threads to use; default is number of logical processors.
23overwrite=t         (ow) Set to false to force the program to abort rather than overwrite an existing file.
24interleaved=auto    (int) If true, forces fastq input to be paired and interleaved.
25trim=t              Trim read ends to remove bases with quality below minq.
26                    Values: t (trim both ends), f (neither end), r (right end only), l (left end only).
27untrim=t            Undo the trimming after mapping.
28minq=4              Trim quality threshold.
29ziplevel=6          (zl) Set to 1 (lowest) through 9 (max) to change compression level; lower compression is faster.
30
31build=1             Choses which masking mode was used:
32                    1 is most stringent and should be used for bacteria.
33                    2 uses fewer bacteria for masking (only RefSeq references).
34                    3 is only masked for plastids and entropy, for use on anything except bacteria.
35                    4 is unmasked.
36
37***** All BBMap parameters can be used; run bbmap.sh for more details. *****
38
39Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
40"
41}
42
43#This block allows symlinked shellscripts to correctly set classpath.
44pushd . > /dev/null
45DIR="${BASH_SOURCE[0]}"
46while [ -h "$DIR" ]; do
47  cd "$(dirname "$DIR")"
48  DIR="$(readlink "$(basename "$DIR")")"
49done
50cd "$(dirname "$DIR")"
51DIR="$(pwd)/"
52popd > /dev/null
53
54#DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
55CP="$DIR""current/"
56JNI="-Djava.library.path=""$DIR""jni/"
57JNI=""
58
59z="-Xmx6000m"
60z2="-Xms6000m"
61set=0
62
63if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
64	usage
65	exit
66fi
67
68calcXmx () {
69	source "$DIR""/calcmem.sh"
70	setEnvironment
71	parseXmx "$@"
72}
73calcXmx "$@"
74
75function removemicrobes() {
76	local CMD="java $EA $EOOM $z $z2 $JNI -cp $CP align2.BBMap strictmaxindel=4 bwr=0.16 bw=12 ef=0.001 minhits=2 path=/global/projectb/sandbox/gaag/bbtools/commonMicrobes pigz unpigz zl=6 qtrim=r trimq=10 untrim idtag printunmappedcount ztd=2 kfilter=25 maxsites=1 k=13 minid=0.95 idfilter=0.95 minhits=2 build=1 bloomfilter $@"
77	echo $CMD >&2
78	eval $CMD
79}
80
81removemicrobes "$@"
82