1#!/usr/local/bin/bash 2 3usage(){ 4echo " 5Written by Brian Bushnell 6Last modified April 30, 2019 7 8Description: Filters VCF files by position or other attributes. 9Filtering by optional fields (such as allele frequency) require VCF files 10generated by CallVariants. 11 12Usage: filtervcf.sh in=<file> out=<file> 13 14I/O parameters: 15in=<file> Input VCF. 16out=<file> Output VCF. 17ref=<file> Reference fasta (optional). 18overwrite=f (ow) Set to false to force the program to abort rather than 19 overwrite an existing file. 20bgzip=f Use bgzip for gzip compression. 21splitalleles=f Split multi-allelic lines into multiple lines. 22splitsubs=f Split multi-base substitutions into SNPs. 23canonize=t Trim variations down to a canonical representation. 24 25Position-filtering parameters: 26minpos= Ignore variants not overlapping this range. 27maxpos= Ignore variants not overlapping this range. 28contigs= Comma-delimited list of contig names to include. These 29 should have no spaces, or underscores instead of spaces. 30invert=f Invert position filters. 31 32Type-filtering parameters: 33sub=t Keep substitutions. 34del=t Keep deletions. 35ins=t Keep insertions. 36 37Variant-quality filtering parameters: 38minreads=0 Ignore variants seen in fewer reads. 39minqualitymax=0 Ignore variants with lower max base quality. 40minedistmax=0 Ignore variants with lower max distance from read ends. 41minmapqmax=0 Ignore variants with lower max mapq. 42minidmax=0 Ignore variants with lower max read identity. 43minpairingrate=0.0 Ignore variants with lower pairing rate. 44minstrandratio=0.0 Ignore variants with lower plus/minus strand ratio. 45minquality=0.0 Ignore variants with lower average base quality. 46minedist=0.0 Ignore variants with lower average distance from ends. 47minavgmapq=0.0 Ignore variants with lower average mapq. 48minallelefraction=0.0 Ignore variants with lower allele fraction. This 49 should be adjusted for high ploidies. 50minid=0 Ignore variants with lower average read identity. 51minscore=0.0 Ignore variants with lower Phred-scaled score. 52clearfilters Reset all variant filters to zero. 53 54There are additionally max filters for score, quality, mapq, allelefraction, 55and identity. 56 57Java Parameters: 58-Xmx This will set Java's memory usage, overriding autodetection. 59 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will 60 specify 200 megs. The max is typically 85% of physical memory. 61-eoom This flag will cause the process to exit if an out-of-memory 62 exception occurs. Requires Java 8u92+. 63-da Disable assertions. 64 65Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 66" 67} 68 69#This block allows symlinked shellscripts to correctly set classpath. 70pushd . > /dev/null 71DIR="${BASH_SOURCE[0]}" 72while [ -h "$DIR" ]; do 73 cd "$(dirname "$DIR")" 74 DIR="$(readlink "$(basename "$DIR")")" 75done 76cd "$(dirname "$DIR")" 77DIR="$(pwd)/" 78popd > /dev/null 79 80#DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 81CP="$DIR""current/" 82 83z="-Xmx4g" 84z2="-Xms4g" 85set=0 86 87if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 88 usage 89 exit 90fi 91 92calcXmx () { 93 source "$DIR""/calcmem.sh" 94 setEnvironment 95 parseXmx "$@" 96 if [[ $set == 1 ]]; then 97 return 98 fi 99 freeRam 4000m 42 100 z="-Xmx${RAM}m" 101 z2="-Xms${RAM}m" 102} 103calcXmx "$@" 104 105filtervcf() { 106 local CMD="java $EA $EOOM $z $z2 -cp $CP var2.FilterVCF $@" 107 echo $CMD >&2 108 eval $CMD 109} 110 111filtervcf "$@" 112