1#!/usr/local/bin/bash
2
3usage(){
4echo "
5Written by Brian Bushnell
6Last modified January 28, 2020
7
8Description:  Merges files of SSU sequences to keep one per taxID.
9By default, a consensus is generated per TaxID, then the sequence
10best matching that consensus is used:
11First, all sequences per TaxID are aligned to a reference consensus.
12Second, the best-matching sequence is used as a seed, and all other
13sequences for that TaxID are aligned to the seed to generate a new consensus.
14Third, in 'consensus' mode, that consensus is simply output.
15In 'best' mode (default), all sequences are aligned again to the new consensus,
16and the best-matching is output.
17
18Usage:  mergeribo.sh in=<file,file> out=<file> 16S
19
20Standard parameters:
21in=<file,file>  Comma-delimited list of files.
22out=<file>      Output file.
23out2=<file>     Read 2 output if reads are in two files.
24overwrite=f     (ow) Set to false to force the program to abort rather than
25                overwrite an existing file.
26showspeed=t     (ss) Set to 'f' to suppress display of processing speed.
27ziplevel=2      (zl) Set to 1 (lowest) through 9 (max) to change compression
28                level; lower compression is faster.
29
30Processing parameters:
31alt=<file>      Lower priority data.  Only used if there is no SSU associated
32                with the TaxID from the primary input.
33best=t          Output the best representative per taxID.
34consensus=f     Output a consensus per taxID instead of the best input
35                sequence.  Mutually exclusive with best.
36fast=f          Output the best sequence based on alignment to global consensus
37                (the seed) rather than individual consensus.
38minid=0.62      Ignore sequences with identity lower than this to the global
39                consensus.
40maxns=-1        Ignore sequences with more than this many Ns, if non-negative.
41minlen=1        Ignore sequences shorter than this.
42maxlen=4000     Ignore sequences longer than this.
4316S=t           Align to 16S consensus to pick the seed. Mutually exclusive.
4418S=f           Align to 18S consensus to pick the seed. Mutually exclusive.
45
46Java Parameters:
47-Xmx            This will set Java's memory usage, overriding autodetection.
48                -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will
49                specify 200 megs. The max is typically 85% of physical memory.
50-eoom           This flag will cause the process to exit if an out-of-memory
51                exception occurs.  Requires Java 8u92+.
52-da             Disable assertions.
53
54Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
55"
56}
57
58#This block allows symlinked shellscripts to correctly set classpath.
59pushd . > /dev/null
60DIR="${BASH_SOURCE[0]}"
61while [ -h "$DIR" ]; do
62  cd "$(dirname "$DIR")"
63  DIR="$(readlink "$(basename "$DIR")")"
64done
65cd "$(dirname "$DIR")"
66DIR="$(pwd)/"
67popd > /dev/null
68
69#DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
70CP="$DIR""current/"
71
72z="-Xmx4g"
73z2="-Xms4g"
74set=0
75
76if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
77	usage
78	exit
79fi
80
81calcXmx () {
82	source "$DIR""/calcmem.sh"
83	setEnvironment
84	parseXmx "$@"
85	if [[ $set == 1 ]]; then
86		return
87	fi
88	freeRam 4000m 42
89	z="-Xmx${RAM}m"
90	z2="-Xms${RAM}m"
91}
92calcXmx "$@"
93
94mergeribo() {
95	local CMD="java $EA $EOOM $z -cp $CP prok.MergeRibo $@"
96	echo $CMD >&2
97	eval $CMD
98}
99
100mergeribo "$@"
101