1#!/usr/local/bin/bash 2 3usage(){ 4echo " 5Written by Brian Bushnell 6Last modified September 4, 2019 7 8Description: Makes a representative set of taxa from all-to-all identity 9comparison. Input should be in 3+ column TSV format (first 3 are required): 10(query, ref, ANI, qsize, rsize, qbases, rbases) 11...as produced by CompareSketch with format=3 and usetaxidname. 12Additional columns are allowed and will be ignored. 13 14Usage: representative.sh in=<input file> out=<output file> 15 16Parameters: 17overwrite=f (ow) Set to false to force the program to abort rather than 18 overwrite an existing file. 19threshold=0 Ignore edges under threshold value. This also affects the 20 choice of centroids; a high threshold gives more weight to 21 higher-value edges. 22minratio=0 Ignores edges with a ratio below this value. 23invertratio=f Invert the ratio when greater than 1. 24printheader=t Print a header line in the output. 25printsize=t Print the size of retained nodes. 26printclusters=t Print the nodes subsumed by each retained node. 27minsize=0 Ignore nodes under this size (in unique kmers). 28maxsize=0 If positive, ignore nodes over this size (unique kmers). 29minbases=0 Ignore nodes under this size (in total bases). 30maxbases=0 If positive, ignore nodes over this size (total bases). 31 32Taxonomy parameters: 33level= Taxonomic level, such as phylum. Filtering will operate on 34 sequences within the same taxonomic level as specified ids. 35 If not set, only matches to a node or its descendants will 36 be considered. 37ids= Comma-delimited list of NCBI numeric IDs. Can also be a 38 file with one taxID per line. 39names= Alternately, a list of names (such as 'Homo sapiens'). 40 Note that spaces need special handling. 41include=f 'f' will discard filtered sequences, 't' will keep them. 42tree=<file> Specify a TaxTree file like tree.taxtree.gz. 43 On Genepool, use 'auto'. 44 45Java Parameters: 46-Xmx This will set Java's memory usage, overriding autodetection. 47 -Xmx20g will 48 specify 20 gigs of RAM, and -Xmx200m will specify 200 megs. 49 The max is typically around 85% of physical memory. 50-eoom This flag will cause the process to exit if an out-of-memory 51 exception occurs. Requires Java 8u92+. 52-da Disable assertions. 53 54Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 55" 56} 57 58#This block allows symlinked shellscripts to correctly set classpath. 59pushd . > /dev/null 60DIR="${BASH_SOURCE[0]}" 61while [ -h "$DIR" ]; do 62 cd "$(dirname "$DIR")" 63 DIR="$(readlink "$(basename "$DIR")")" 64done 65cd "$(dirname "$DIR")" 66DIR="$(pwd)/" 67popd > /dev/null 68 69#DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 70CP="$DIR""current/" 71 72z="-Xmx4g" 73z2="-Xms4g" 74set=0 75 76if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 77 usage 78 exit 79fi 80 81calcXmx () { 82 source "$DIR""/calcmem.sh" 83 setEnvironment 84 parseXmx "$@" 85 if [[ $set == 1 ]]; then 86 return 87 fi 88 freeRam 4000m 84 89 z="-Xmx${RAM}m" 90 z2="-Xms${RAM}m" 91} 92calcXmx "$@" 93 94a_sample_mt() { 95 local CMD="java $EA $EOOM $z -cp $CP jgi.RepresentativeSet $@" 96 echo $CMD >&2 97 eval $CMD 98} 99 100a_sample_mt "$@" 101