1#!/bin/bash 2 3vcftools --vcf $1 --missing --out $2 4 5CUTOFF=$(mawk '!/IN/' $2.imiss | cut -f5 | sort -rn | perl -e '$d=.14;@l=<>;print $l[int($d*$#l)]') 6#echo $CUTOFF 7 8mawk '!/IN/' $2.imiss | cut -f5 > totalmissing 9 10gnuplot << \EOF 11set terminal dumb size 120, 30 12set autoscale 13unset label 14set title "Histogram of % missing data per individual" 15set ylabel "Number of Occurrences" 16set xlabel "% of missing data" 17#set yr [0:100000] 18binwidth=0.01 19bin(x,width)=width*floor(x/width) + binwidth/2.0 20plot 'totalmissing' using (bin($1,binwidth)):(1.0) smooth freq with boxes 21pause -1 22EOF 23 24echo "The 85% cutoff would be" $CUTOFF 25echo "Would you like to set a different cutoff, yes or no" 26 27read NEWCUTOFF 28 29if [ "$NEWCUTOFF" != "yes" ]; then 30 31mawk -v x=$CUTOFF '$5 > x' $2.imiss | cut -f1 > lowDP.indv 32 33vcftools --vcf $1 --remove lowDP.indv --recode --recode-INFO-all --out $2 34 35else 36 37echo "Please enter new cutoff" 38 39read CUTOFF2 40 41mawk -v x=$CUTOFF2 '$5 > x' $2.imiss | cut -f1 > lowDP.indv 42 43vcftools --vcf $1 --remove lowDP.indv --recode --recode-INFO-all --out $2 44fi 45