1#!/usr/local/bin/bash 2 3usage(){ 4echo " 5Written by Brian Bushnell 6Last modified January 28, 2020 7 8Description: Merges files of SSU sequences to keep one per taxID. 9By default, a consensus is generated per TaxID, then the sequence 10best matching that consensus is used: 11First, all sequences per TaxID are aligned to a reference consensus. 12Second, the best-matching sequence is used as a seed, and all other 13sequences for that TaxID are aligned to the seed to generate a new consensus. 14Third, in 'consensus' mode, that consensus is simply output. 15In 'best' mode (default), all sequences are aligned again to the new consensus, 16and the best-matching is output. 17 18Usage: mergeribo.sh in=<file,file> out=<file> 16S 19 20Standard parameters: 21in=<file,file> Comma-delimited list of files. 22out=<file> Output file. 23out2=<file> Read 2 output if reads are in two files. 24overwrite=f (ow) Set to false to force the program to abort rather than 25 overwrite an existing file. 26showspeed=t (ss) Set to 'f' to suppress display of processing speed. 27ziplevel=2 (zl) Set to 1 (lowest) through 9 (max) to change compression 28 level; lower compression is faster. 29 30Processing parameters: 31alt=<file> Lower priority data. Only used if there is no SSU associated 32 with the TaxID from the primary input. 33best=t Output the best representative per taxID. 34consensus=f Output a consensus per taxID instead of the best input 35 sequence. Mutually exclusive with best. 36fast=f Output the best sequence based on alignment to global consensus 37 (the seed) rather than individual consensus. 38minid=0.62 Ignore sequences with identity lower than this to the global 39 consensus. 40maxns=-1 Ignore sequences with more than this many Ns, if non-negative. 41minlen=1 Ignore sequences shorter than this. 42maxlen=4000 Ignore sequences longer than this. 4316S=t Align to 16S consensus to pick the seed. Mutually exclusive. 4418S=f Align to 18S consensus to pick the seed. Mutually exclusive. 45 46Java Parameters: 47-Xmx This will set Java's memory usage, overriding autodetection. 48 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will 49 specify 200 megs. The max is typically 85% of physical memory. 50-eoom This flag will cause the process to exit if an out-of-memory 51 exception occurs. Requires Java 8u92+. 52-da Disable assertions. 53 54Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 55" 56} 57 58#This block allows symlinked shellscripts to correctly set classpath. 59pushd . > /dev/null 60DIR="${BASH_SOURCE[0]}" 61while [ -h "$DIR" ]; do 62 cd "$(dirname "$DIR")" 63 DIR="$(readlink "$(basename "$DIR")")" 64done 65cd "$(dirname "$DIR")" 66DIR="$(pwd)/" 67popd > /dev/null 68 69#DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 70CP="$DIR""current/" 71 72z="-Xmx4g" 73z2="-Xms4g" 74set=0 75 76if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 77 usage 78 exit 79fi 80 81calcXmx () { 82 source "$DIR""/calcmem.sh" 83 setEnvironment 84 parseXmx "$@" 85 if [[ $set == 1 ]]; then 86 return 87 fi 88 freeRam 4000m 42 89 z="-Xmx${RAM}m" 90 z2="-Xms${RAM}m" 91} 92calcXmx "$@" 93 94mergeribo() { 95 local CMD="java $EA $EOOM $z -cp $CP prok.MergeRibo $@" 96 echo $CMD >&2 97 eval $CMD 98} 99 100mergeribo "$@" 101