1#!/bin/bash 2# Reduce VCFs using a BED File, gzip them up and create tabix index 3# 4if [ $# -ne 2 ]; 5then 6 echo "Usage: $0 [region file] [directory]" 7 echo 8 echo "Generates \`basename directory\`.vcf.gz, which is the concatenation" 9 echo "of files in the directory named [directory]/[region1].vcf.gz," 10 echo "[directory]/[region2].vcf.gz, etc. in the order in which they" 11 echo "occur in the region file." 12 echo 13 echo "A tabix index is subsequently generated." 14 exit 1 15fi 16 17regionfile=$1 18mergedir=$2 19mergename=$(basename $mergedir) 20vcfgenotypes=$mergename.vcf.gz 21#vcfsites=$mergename.sites.vcf.gz 22 23firstfile=$mergedir/$(head -1 $regionfile).vcf.gz 24files=$(for region in $(cat $regionfile); do echo $mergedir/$region.vcf.gz; done) 25 26( zcat $firstfile | head -1000 | grep ^# 27for file in $files 28do 29 zcat $file | grep -v "^#" 30done ) | ( bgzip >$vcfgenotypes && tabix -p vcf $vcfgenotypes ) 31