1#!/bin/bash 2 3if [ $# -ne 2 ]; 4then 5 echo "Usage: $0 [region file] [directory]" 6 echo 7 echo "Generates \`basename directory\`.vcf.gz and \`basename directory\`.sites.vcf.gz" 8 echo "which are the concatenation of files in the directory named [directory]/[region1].vcf.gz," 9 echo "[directory]/[region2].vcf.gz, etc. in the order in which they occur in the region file." 10 echo 11 echo "Tabix indexes are simultaneously generated." 12 exit 1 13fi 14 15regionfile=$1 16mergedir=$2 17mergename=$(basename $mergedir) 18vcfgenotypes=$mergename.vcf.gz 19vcfsites=$mergename.sites.vcf.gz 20 21regions=$(cat $regionfile) 22 23firstfile=$mergedir/$(echo $regions | cut -f 1 -d\ ).vcf.gz 24files=$(for region in $regions; do echo $mergedir/$region.vcf.gz; done) 25 26( zcat $firstfile | head -1000 | grep ^# 27for file in $files 28do 29 zcat $file | grep -v "^#" 30done ) | uniq | pee \ 31 "bgzip >$vcfgenotypes && tabix -p vcf $vcfgenotypes" \ 32 "cut -f -8 | bgzip >$vcfsites && tabix -p vcf $vcfsites" 33