1#!/usr/local/bin/bash
2
3usage(){
4echo "
5Written by Brian Bushnell
6Last modified April 30, 2019
7
8Description:  Filters VCF files by position or other attributes.
9Filtering by optional fields (such as allele frequency) require VCF files
10generated by CallVariants.
11
12Usage:  filtervcf.sh in=<file> out=<file>
13
14I/O parameters:
15in=<file>       Input VCF.
16out=<file>      Output VCF.
17ref=<file>      Reference fasta (optional).
18overwrite=f     (ow) Set to false to force the program to abort rather than
19                overwrite an existing file.
20bgzip=f         Use bgzip for gzip compression.
21splitalleles=f  Split multi-allelic lines into multiple lines.
22splitsubs=f     Split multi-base substitutions into SNPs.
23canonize=t      Trim variations down to a canonical representation.
24
25Position-filtering parameters:
26minpos=         Ignore variants not overlapping this range.
27maxpos=         Ignore variants not overlapping this range.
28contigs=        Comma-delimited list of contig names to include. These
29                should have no spaces, or underscores instead of spaces.
30invert=f        Invert position filters.
31
32Type-filtering parameters:
33sub=t           Keep substitutions.
34del=t           Keep deletions.
35ins=t           Keep insertions.
36
37Variant-quality filtering parameters:
38minreads=0              Ignore variants seen in fewer reads.
39minqualitymax=0         Ignore variants with lower max base quality.
40minedistmax=0           Ignore variants with lower max distance from read ends.
41minmapqmax=0            Ignore variants with lower max mapq.
42minidmax=0              Ignore variants with lower max read identity.
43minpairingrate=0.0      Ignore variants with lower pairing rate.
44minstrandratio=0.0      Ignore variants with lower plus/minus strand ratio.
45minquality=0.0          Ignore variants with lower average base quality.
46minedist=0.0            Ignore variants with lower average distance from ends.
47minavgmapq=0.0          Ignore variants with lower average mapq.
48minallelefraction=0.0   Ignore variants with lower allele fraction.  This
49                        should be adjusted for high ploidies.
50minid=0                 Ignore variants with lower average read identity.
51minscore=0.0            Ignore variants with lower Phred-scaled score.
52clearfilters            Reset all variant filters to zero.
53
54There are additionally max filters for score, quality, mapq, allelefraction,
55and identity.
56
57Java Parameters:
58-Xmx            This will set Java's memory usage, overriding autodetection.
59                -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will
60                specify 200 megs. The max is typically 85% of physical memory.
61-eoom           This flag will cause the process to exit if an out-of-memory
62                exception occurs.  Requires Java 8u92+.
63-da             Disable assertions.
64
65Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
66"
67}
68
69#This block allows symlinked shellscripts to correctly set classpath.
70pushd . > /dev/null
71DIR="${BASH_SOURCE[0]}"
72while [ -h "$DIR" ]; do
73  cd "$(dirname "$DIR")"
74  DIR="$(readlink "$(basename "$DIR")")"
75done
76cd "$(dirname "$DIR")"
77DIR="$(pwd)/"
78popd > /dev/null
79
80#DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
81CP="$DIR""current/"
82
83z="-Xmx4g"
84z2="-Xms4g"
85set=0
86
87if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
88	usage
89	exit
90fi
91
92calcXmx () {
93	source "$DIR""/calcmem.sh"
94	setEnvironment
95	parseXmx "$@"
96	if [[ $set == 1 ]]; then
97		return
98	fi
99	freeRam 4000m 42
100	z="-Xmx${RAM}m"
101	z2="-Xms${RAM}m"
102}
103calcXmx "$@"
104
105filtervcf() {
106	local CMD="java $EA $EOOM $z $z2 -cp $CP var2.FilterVCF $@"
107	echo $CMD >&2
108	eval $CMD
109}
110
111filtervcf "$@"
112