1#!/bin/bash
2
3if [ $# -ne 2 ];
4then
5    echo "Usage: $0 [region file] [directory]"
6    echo
7    echo "Generates \`basename directory\`.vcf.gz and \`basename directory\`.sites.vcf.gz"
8    echo "which are the concatenation of files in the directory named [directory]/[region1].vcf.gz,"
9    echo "[directory]/[region2].vcf.gz, etc. in the order in which they occur in the region file."
10    echo
11    echo "Tabix indexes are simultaneously generated."
12    exit 1
13fi
14
15regionfile=$1
16mergedir=$2
17mergename=$(basename $mergedir)
18vcfgenotypes=$mergename.vcf.gz
19vcfsites=$mergename.sites.vcf.gz
20
21regions=$(cat $regionfile)
22
23firstfile=$mergedir/$(echo $regions | cut -f 1 -d\  ).vcf.gz
24files=$(for region in $regions; do echo $mergedir/$region.vcf.gz; done)
25
26( zcat $firstfile | head -1000 | grep ^#
27for file in $files
28do
29    zcat $file | grep -v "^#"
30done ) | uniq | pee \
31        "bgzip >$vcfgenotypes && tabix -p vcf $vcfgenotypes" \
32        "cut -f -8 | bgzip >$vcfsites && tabix -p vcf $vcfsites"
33