1#!/bin/bash
2# Reduce VCFs using a BED File, gzip them up and create tabix index
3#
4if [ $# -ne 2 ];
5then
6    echo "Usage: $0 [region file] [directory]"
7    echo
8    echo "Generates \`basename directory\`.vcf.gz, which is the concatenation"
9    echo "of files in the directory named [directory]/[region1].vcf.gz,"
10    echo "[directory]/[region2].vcf.gz, etc. in the order in which they"
11    echo "occur in the region file."
12    echo
13    echo "A tabix index is subsequently generated."
14    exit 1
15fi
16
17regionfile=$1
18mergedir=$2
19mergename=$(basename $mergedir)
20vcfgenotypes=$mergename.vcf.gz
21#vcfsites=$mergename.sites.vcf.gz
22
23firstfile=$mergedir/$(head -1 $regionfile).vcf.gz
24files=$(for region in $(cat $regionfile); do echo $mergedir/$region.vcf.gz; done)
25
26( zcat $firstfile | head -1000 | grep ^#
27for file in $files
28do
29    zcat $file | grep -v "^#"
30done ) | ( bgzip >$vcfgenotypes && tabix -p vcf $vcfgenotypes )
31