1#!/usr/local/bin/perl
2#
3#   Copyright (C) 2010 Broad Institute.
4#   Copyright (C) 2011, 2014, 2017 Genome Research Ltd.
5#
6#   Author: Heng Li <lh3@sanger.ac.uk>
7#
8# Permission is hereby granted, free of charge, to any person obtaining a copy
9# of this software and associated documentation files (the "Software"), to deal
10# in the Software without restriction, including without limitation the rights
11# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12# copies of the Software, and to permit persons to whom the Software is
13# furnished to do so, subject to the following conditions:
14#
15# The above copyright notice and this permission notice shall be included in
16# all copies or substantial portions of the Software.
17#
18# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24# DEALINGS IN THE SOFTWARE.
25
26use strict;
27use warnings;
28use Getopt::Std;
29
30&main;
31exit;
32
33sub main {
34  &usage if (@ARGV < 1);
35  my $command = shift(@ARGV);
36  my %func = (subsam=>\&subsam, listsam=>\&listsam, fillac=>\&fillac, qstats=>\&qstats, varFilter=>\&varFilter,
37              hapmap2vcf=>\&hapmap2vcf, ucscsnp2vcf=>\&ucscsnp2vcf, filter4vcf=>\&varFilter, ldstats=>\&ldstats,
38              gapstats=>\&gapstats, splitchr=>\&splitchr, vcf2fq=>\&vcf2fq);
39  die("Unknown command \"$command\".\n") if (!defined($func{$command}));
40  &{$func{$command}};
41}
42
43sub splitchr {
44  my %opts = (l=>5000000);
45  getopts('l:', \%opts);
46  my $l = $opts{l};
47  die(qq/Usage: vcfutils.pl splitchr [-l $opts{l}] <in.fa.fai>\n/) if (@ARGV == 0 && -t STDIN);
48  while (<>) {
49    my @t = split;
50    my $last = 0;
51    for (my $i = 0; $i < $t[1];) {
52      my $e = ($t[1] - $i) / $l < 1.1? $t[1] : $i + $l;
53      print "$t[0]:".($i+1)."-$e\n";
54      $i = $e;
55    }
56  }
57}
58
59sub subsam {
60  die(qq/Usage: vcfutils.pl subsam <in.vcf> [samples]\n/) if (@ARGV == 0);
61  my ($fh, %h);
62  my $fn = shift(@ARGV);
63  my @col;
64  open($fh, ($fn =~ /\.gz$/)? "gzip -dc $fn |" : $fn) || die;
65  $h{$_} = 1 for (@ARGV);
66  while (<$fh>) {
67    if (/^##/) {
68      print;
69    } elsif (/^#/) {
70      my @t = split;
71      my @s = @t[0..8]; # all fixed fields + FORMAT
72      for (9 .. $#t) {
73        if ($h{$t[$_]}) {
74          push(@s, $t[$_]);
75          push(@col, $_);
76        }
77      }
78      pop(@s) if (@s == 9); # no sample selected; remove the FORMAT field
79      print join("\t", @s), "\n";
80    } else {
81      my @t = split;
82      if (@col == 0) {
83        print join("\t", @t[0..7]), "\n";
84      } else {
85        print join("\t", @t[0..8], map {$t[$_]} @col), "\n";
86      }
87    }
88  }
89  close($fh);
90}
91
92sub listsam {
93  die(qq/Usage: vcfutils.pl listsam <in.vcf>\n/) if (@ARGV == 0 && -t STDIN);
94  while (<>) {
95    if (/^#/ && !/^##/) {
96      my @t = split;
97      print join("\n", @t[9..$#t]), "\n";
98      exit;
99    }
100  }
101}
102
103sub fillac {
104  die(qq/Usage: vcfutils.pl fillac <in.vcf>\n\nNote: The GT field MUST BE present and always appear as the first field.\n/) if (@ARGV == 0 && -t STDIN);
105  while (<>) {
106    if (/^#/) {
107      print;
108    } else {
109      my @t = split;
110      my @c = (0, 0);
111      my $n = 0;
112      my $s = -1;
113      @_ = split(":", $t[8]);
114      for (0 .. $#_) {
115        if ($_[$_] eq 'GT') { $s = $_; last; }
116      }
117      if ($s < 0) {
118        print join("\t", @t), "\n";
119        next;
120      }
121      for (9 .. $#t) {
122        if ($t[$_] =~ /^0,0,0/) {
123        } elsif ($t[$_] =~ /^([^\s:]+:){$s}(\d+).(\d+)/) {
124          ++$c[$2]; ++$c[$3];
125          $n += 2;
126        }
127      }
128      my $AC = "AC=" . join("\t", @c[1..$#c]) . ";AN=$n";
129      my $info = $t[7];
130      $info =~ s/(;?)AC=(\d+)//;
131      $info =~ s/(;?)AN=(\d+)//;
132      if ($info eq '.') {
133        $info = $AC;
134      } else {
135        $info .= ";$AC";
136      }
137      $t[7] = $info;
138      print join("\t", @t), "\n";
139    }
140  }
141}
142
143sub ldstats {
144  my %opts = (t=>0.9);
145  getopts('t:', \%opts);
146  die("Usage: vcfutils.pl ldstats [-t $opts{t}] <in.vcf>\n") if (@ARGV == 0 && -t STDIN);
147  my $cutoff = $opts{t};
148  my ($last, $lastchr) = (0x7fffffff, '');
149  my ($x, $y, $n) = (0, 0, 0);
150  while (<>) {
151    if (/^([^#\s]+)\s(\d+)/) {
152      my ($chr, $pos) = ($1, $2);
153      if (/NEIR=([\d\.]+)/) {
154        ++$n;
155        ++$y, $x += $pos - $last if ($lastchr eq $chr && $pos > $last && $1 > $cutoff);
156      }
157      $last = $pos; $lastchr = $chr;
158    }
159  }
160  print "Number of SNP intervals in strong LD (r > $opts{t}): $y\n";
161  print "Fraction: ", $y/$n, "\n";
162  print "Length: $x\n";
163}
164
165sub qstats {
166  my %opts = (r=>'', s=>0.02, v=>undef);
167  getopts('r:s:v', \%opts);
168  die("Usage: vcfutils.pl qstats [-r ref.vcf] <in.vcf>\n
169Note: This command discards indels. Output: QUAL #non-indel #SNPs #transitions #joint ts/tv #joint/#ref #joint/#non-indel \n") if (@ARGV == 0 && -t STDIN);
170  my %ts = (AG=>1, GA=>1, CT=>1, TC=>1);
171  my %h = ();
172  my $is_vcf = defined($opts{v})? 1 : 0;
173  if ($opts{r}) { # read the reference positions
174    my $fh;
175    open($fh, $opts{r}) || die;
176    while (<$fh>) {
177      next if (/^#/);
178      if ($is_vcf) {
179        my @t = split;
180        $h{$t[0],$t[1]} = $t[4];
181      } else {
182        $h{$1,$2} = 1 if (/^(\S+)\s+(\d+)/);
183      }
184    }
185    close($fh);
186  }
187  my $hsize = scalar(keys %h);
188  my @a;
189  while (<>) {
190    next if (/^#/);
191    my @t = split;
192    next if (length($t[3]) != 1 || uc($t[3]) eq 'N');
193    $t[3] = uc($t[3]); $t[4] = uc($t[4]);
194    my @s = split(',', $t[4]);
195    $t[5] = 3 if ($t[5] eq '.' || $t[5] < 0);
196    next if (length($s[0]) != 1);
197    my $hit;
198    if ($is_vcf) {
199      $hit = 0;
200      my $aa = $h{$t[0],$t[1]};
201      if (defined($aa)) {
202        my @aaa = split(",", $aa);
203        for (@aaa) {
204          $hit = 1 if ($_ eq $s[0]);
205        }
206      }
207    } else {
208      $hit = defined($h{$t[0],$t[1]})? 1 : 0;
209    }
210    push(@a, [$t[5], ($t[4] eq '.' || $t[4] eq $t[3])? 0 : 1, $ts{$t[3].$s[0]}? 1 : 0, $hit]);
211  }
212  push(@a, [-1, 0, 0, 0]); # end marker
213  die("[qstats] No SNP data!\n") if (@a == 0);
214  @a = sort {$b->[0]<=>$a->[0]} @a;
215  my $next = $opts{s};
216  my $last = $a[0];
217  my @c = (0, 0, 0, 0);
218  my @lc;
219  $lc[1] = $lc[2] = 0;
220  for my $p (@a) {
221    if ($p->[0] == -1 || ($p->[0] != $last && $c[0]/@a > $next)) {
222      my @x;
223      $x[0] = sprintf("%.4f", $c[1]-$c[2]? $c[2] / ($c[1] - $c[2]) : 100);
224      $x[1] = sprintf("%.4f", $hsize? $c[3] / $hsize : 0);
225      $x[2] = sprintf("%.4f", $c[3] / $c[1]);
226      my $a = $c[1] - $lc[1];
227      my $b = $c[2] - $lc[2];
228      $x[3] = sprintf("%.4f", $a-$b? $b / ($a-$b) : 100);
229      print join("\t", $last, @c, @x), "\n";
230      $next = $c[0]/@a + $opts{s};
231      $lc[1] = $c[1]; $lc[2] = $c[2];
232    }
233    ++$c[0]; $c[1] += $p->[1]; $c[2] += $p->[2]; $c[3] += $p->[3];
234    $last = $p->[0];
235  }
236}
237
238sub varFilter {
239  my %opts = (d=>2, D=>10000000, a=>2, W=>10, Q=>10, w=>3, p=>undef, 1=>1e-4, 2=>1e-100, 3=>0, 4=>1e-4, G=>0, S=>1000, e=>1e-4);
240  getopts('pd:D:W:Q:w:a:1:2:3:4:G:S:e:', \%opts);
241  die(qq/
242Usage:   vcfutils.pl varFilter [options] <in.vcf>
243
244Options: -Q INT    minimum RMS mapping quality for SNPs [$opts{Q}]
245         -d INT    minimum read depth [$opts{d}]
246         -D INT    maximum read depth [$opts{D}]
247         -a INT    minimum number of alternate bases [$opts{a}]
248         -w INT    SNP within INT bp around a gap to be filtered [$opts{w}]
249         -W INT    window size for filtering adjacent gaps [$opts{W}]
250         -1 FLOAT  min P-value for strand bias (given PV4) [$opts{1}]
251         -2 FLOAT  min P-value for baseQ bias [$opts{2}]
252         -3 FLOAT  min P-value for mapQ bias [$opts{3}]
253         -4 FLOAT  min P-value for end distance bias [$opts{4}]
254         -e FLOAT  min P-value for HWE (plus F<0) [$opts{e}]
255         -p        print filtered variants
256
257Note: Some of the filters rely on annotations generated by SAMtools\/BCFtools.
258\n/) if (@ARGV == 0 && -t STDIN);
259
260  # calculate the window size
261  my ($ol, $ow) = ($opts{W}, $opts{w});
262  my $max_dist = $ol > $ow? $ol : $ow;
263  # the core loop
264  my @staging; # (indel_filtering_score, flt_tag, indel_span; chr, pos, ...)
265  while (<>) {
266    my @t = split;
267    if (/^#/) {
268      print; next;
269    }
270    next if ($t[4] eq '.'); # skip non-var sites
271    next if ($t[3] eq 'N'); # skip sites with unknown ref ('N')
272    # check if the site is a SNP
273    my $type = 1; # SNP
274    if (length($t[3]) > 1) {
275      $type = 2; # MNP
276      my @s = split(',', $t[4]);
277      for (@s) {
278        $type = 3 if (length != length($t[3]));
279      }
280    } else {
281      my @s = split(',', $t[4]);
282      for (@s) {
283        $type = 3 if (length > 1);
284      }
285    }
286    # clear the out-of-range elements
287    while (@staging) {
288      # Still on the same chromosome and the first element's window still affects this position?
289      last if ($staging[0][3] eq $t[0] && $staging[0][4] + $staging[0][2] + $max_dist >= $t[1]);
290      varFilter_aux(shift(@staging), $opts{p}); # calling a function is a bit slower, not much
291    }
292    my $flt = 0;
293    # parse annotations
294    my ($dp, $mq, $dp_alt) = (-1, -1, -1);
295    if ($t[7] =~ /DP4=(\d+),(\d+),(\d+),(\d+)/i) {
296      $dp = $1 + $2 + $3 + $4;
297      $dp_alt = $3 + $4;
298    }
299    if ($t[7] =~ /DP=(\d+)/i) {
300      $dp = $1;
301    }
302    $mq = $1 if ($t[7] =~ /MQ=(\d+)/i);
303    # the depth and mapQ filter
304    if ($dp >= 0) {
305      if ($dp < $opts{d}) {
306        $flt = 2;
307      } elsif ($dp > $opts{D}) {
308        $flt = 3;
309      }
310    }
311    $flt = 4 if ($dp_alt >= 0 && $dp_alt < $opts{a});
312    $flt = 1 if ($flt == 0 && $mq >= 0 && $mq < $opts{Q});
313    $flt = 7 if ($flt == 0 && /PV4=([^,]+),([^,]+),([^,]+),([^,;\t]+)/
314                 && ($1<$opts{1} || $2<$opts{2} || $3<$opts{3} || $4<$opts{4}));
315    $flt = 8 if ($flt == 0 && ((/MXGQ=(\d+)/ && $1 < $opts{G}) || (/MXSP=(\d+)/ && $1 >= $opts{S})));
316    # HWE filter
317    if ($t[7] =~ /G3=([^;,]+),([^;,]+),([^;,]+).*HWE=([^;,]+)/ && $4 < $opts{e}) {
318        my $p = 2*$1 + $2;
319        my $f = ($p > 0 && $p < 1)? 1 - $2 / ($p * (1-$p)) : 0;
320        $flt = 9 if ($f < 0);
321    }
322
323    my $score = $t[5] * 100 + $dp_alt;
324    my $rlen = length($t[3]) - 1; # $indel_score<0 for SNPs
325    if ($flt == 0) {
326      if ($type == 3) { # an indel
327        # filtering SNPs and MNPs
328        for my $x (@staging) {
329          next if (($x->[0]&3) == 3 || $x->[1] || $x->[4] + $x->[2] + $ow < $t[1]);
330          $x->[1] = 5;
331        }
332        # check the staging list for indel filtering
333        for my $x (@staging) {
334          next if (($x->[0]&3) != 3 || $x->[1] || $x->[4] + $x->[2] + $ol < $t[1]);
335          if ($x->[0]>>2 < $score) {
336            $x->[1] = 6;
337          } else {
338            $flt = 6; last;
339          }
340        }
341      } else { # SNP or MNP
342        for my $x (@staging) {
343          next if (($x->[0]&3) != 3 || $x->[4] + $x->[2] + $ow < $t[1]);
344          if ($x->[4] + length($x->[7]) - 1 == $t[1] && substr($x->[7], -1, 1) eq substr($t[4], 0, 1)
345              && length($x->[7]) - length($x->[6]) == 1) {
346            $x->[1] = 5;
347          } else { $flt = 5; }
348          last;
349        }
350        # check MNP
351        for my $x (@staging) {
352          next if (($x->[0]&3) == 3 || $x->[4] + $x->[2] < $t[1]);
353          if ($x->[0]>>2 < $score) {
354            $x->[1] = 8;
355          } else {
356            $flt = 8; last;
357          }
358        }
359      }
360    }
361    push(@staging, [$score<<2|$type, $flt, $rlen, @t]);
362  }
363  # output the last few elements in the staging list
364  while (@staging) {
365    varFilter_aux(shift @staging, $opts{p});
366  }
367}
368
369sub varFilter_aux {
370  my ($first, $is_print) = @_;
371  if ($first->[1] == 0) {
372    print join("\t", @$first[3 .. @$first-1]), "\n";
373  } elsif ($is_print) {
374    print STDERR join("\t", substr("UQdDaGgPMS", $first->[1], 1), @$first[3 .. @$first-1]), "\n";
375  }
376}
377
378sub gapstats {
379  my (@c0, @c1);
380  $c0[$_] = $c1[$_] = 0 for (0 .. 10000);
381  while (<>) {
382    next if (/^#/);
383    my @t = split;
384    next if (length($t[3]) == 1 && $t[4] =~ /^[A-Za-z](,[A-Za-z])*$/); # not an indel
385    my @s = split(',', $t[4]);
386    for my $x (@s) {
387      my $l = length($x) - length($t[3]) + 5000;
388      if ($x =~ /^-/) {
389        $l = -(length($x) - 1) + 5000;
390      } elsif ($x =~ /^\+/) {
391        $l = length($x) - 1 + 5000;
392      }
393      $c0[$l] += 1 / @s;
394    }
395  }
396  for (my $i = 0; $i < 10000; ++$i) {
397    next if ($c0[$i] == 0);
398    $c1[0] += $c0[$i];
399    $c1[1] += $c0[$i] if (($i-5000)%3 == 0);
400    printf("C\t%d\t%.2f\n", ($i-5000), $c0[$i]);
401  }
402  printf("3\t%d\t%d\t%.3f\n", $c1[0], $c1[1], $c1[1]/$c1[0]);
403}
404
405sub ucscsnp2vcf {
406  die("Usage: vcfutils.pl <in.ucsc.snp>\n") if (@ARGV == 0 && -t STDIN);
407  print "##fileformat=VCFv4.0\n";
408  print join("\t", "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"), "\n";
409  while (<>) {
410    my @t = split("\t");
411    my $indel = ($t[9] =~ /^[ACGT](\/[ACGT])+$/)? 0 : 1;
412    my $pos = $t[2] + 1;
413    my @alt;
414    push(@alt, $t[7]);
415    if ($t[6] eq '-') {
416      $t[9] = reverse($t[9]);
417      $t[9] =~ tr/ACGTRYMKWSNacgtrymkwsn/TGCAYRKMWSNtgcayrkmwsn/;
418    }
419    my @a = split("/", $t[9]);
420    for (@a) {
421      push(@alt, $_) if ($_ ne $alt[0]);
422    }
423    if ($indel) {
424      --$pos;
425      for (0 .. $#alt) {
426        $alt[$_] =~ tr/-//d;
427        $alt[$_] = "N$alt[$_]";
428      }
429    }
430    my $ref = shift(@alt);
431    my $af = $t[13] > 0? ";AF=$t[13]" : '';
432    my $valid = ($t[12] eq 'unknown')? '' : ";valid=$t[12]";
433    my $info = "molType=$t[10];class=$t[11]$valid$af";
434    print join("\t", $t[1], $pos, $t[4], $ref, join(",", @alt), 0, '.', $info), "\n";
435  }
436}
437
438sub hapmap2vcf {
439  die("Usage: vcfutils.pl <in.ucsc.snp> <in.hapmap>\n") if (@ARGV == 0);
440  my $fn = shift(@ARGV);
441  # parse UCSC SNP
442  warn("Parsing UCSC SNPs...\n");
443  my ($fh, %map);
444  open($fh, ($fn =~ /\.gz$/)? "gzip -dc $fn |" : $fn) || die;
445  while (<$fh>) {
446    my @t = split;
447    next if ($t[3] - $t[2] != 1); # not SNP
448    @{$map{$t[4]}} = @t[1,3,7];
449  }
450  close($fh);
451  # write VCF
452  warn("Writing VCF...\n");
453  print "##fileformat=VCFv4.0\n";
454  while (<>) {
455    my @t = split;
456    if ($t[0] eq 'rs#') { # the first line
457      print join("\t", "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT", @t[11..$#t]), "\n";
458    } else {
459      next unless ($map{$t[0]});
460      next if (length($t[1]) != 3); # skip non-SNPs
461      my $a = \@{$map{$t[0]}};
462      my $ref = $a->[2];
463      my @u = split('/', $t[1]);
464      if ($u[1] eq $ref) {
465        $u[1] = $u[0]; $u[0] = $ref;
466      } elsif ($u[0] ne $ref) { next; }
467      my $alt = $u[1];
468      my %w;
469      $w{$u[0]} = 0; $w{$u[1]} = 1;
470      my @s = (@$a[0,1], $t[0], $ref, $alt, 0, '.', '.', 'GT');
471      my $is_tri = 0;
472      for (@t[11..$#t]) {
473        if ($_ eq 'NN') {
474          push(@s, './.');
475        } else {
476          my @a = ($w{substr($_,0,1)}, $w{substr($_,1,1)});
477          if (!defined($a[0]) || !defined($a[1])) {
478            $is_tri = 1;
479            last;
480          }
481          push(@s, "$a[0]/$a[1]");
482        }
483      }
484      next if ($is_tri);
485      print join("\t", @s), "\n";
486    }
487  }
488}
489
490sub vcf2fq {
491  my %opts = (d=>3, D=>100000, Q=>10, l=>5);
492  getopts('d:D:Q:l:', \%opts);
493  die(qq/
494Usage:   vcfutils.pl vcf2fq [options] <all-site.vcf>
495
496Options: -d INT    minimum depth          [$opts{d}]
497         -D INT    maximum depth          [$opts{D}]
498         -Q INT    min RMS mapQ           [$opts{Q}]
499         -l INT    INDEL filtering window [$opts{l}]
500\n/) if (@ARGV == 0 && -t STDIN);
501
502  my ($last_chr, $seq, $qual, $last_pos, @gaps);
503  my $_Q = $opts{Q};
504  my $_d = $opts{d};
505  my $_D = $opts{D};
506
507  my %het = (AC=>'M', AG=>'R', AT=>'W', CA=>'M', CG=>'S', CT=>'Y',
508             GA=>'R', GC=>'S', GT=>'K', TA=>'W', TC=>'Y', TG=>'K');
509
510  $last_chr = '';
511  while (<>) {
512    next if (/^#/);
513    my @t = split;
514    if ($last_chr ne $t[0]) {
515      &v2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l}) if ($last_chr);
516      ($last_chr, $last_pos) = ($t[0], 0);
517      $seq = $qual = '';
518      @gaps = ();
519    }
520    die("[vcf2fq] unsorted input\n") if ($t[1] - $last_pos < 0);
521    if ($t[1] - $last_pos > 1) {
522      $seq .= 'n' x ($t[1] - $last_pos - 1);
523      $qual .= '!' x ($t[1] - $last_pos - 1);
524    }
525    if (length($t[3]) == 1 && $t[7] !~ /INDEL/ && $t[4] =~ /^([A-Za-z.])(,[A-Za-z])*$/) { # a SNP or reference
526      my ($ref, $alt) = ($t[3], $1);
527      my ($b, $q);
528      $q = $1 if ($t[7] =~ /FQ=(-?[\d\.]+)/);
529      if ($q < 0) {
530        $_ = ($t[7] =~ /AF1=([\d\.]+)/)? $1 : 0;
531        $b = ($_ < .5 || $alt eq '.')? $ref : $alt;
532        $q = -$q;
533      } else {
534        $b = $het{"$ref$alt"};
535        $b ||= 'N';
536      }
537      $b = lc($b);
538      $b = uc($b) if (($t[7] =~ /MQ=(\d+)/ && $1 >= $_Q) && ($t[7] =~ /DP=(\d+)/ && $1 >= $_d && $1 <= $_D));
539      $q = int($q + 33 + .499);
540      $q = chr($q <= 126? $q : 126);
541      $seq .= $b;
542      $qual .= $q;
543    } elsif ($t[4] ne '.') { # an INDEL
544      push(@gaps, [$t[1], length($t[3])]);
545    }
546    $last_pos = $t[1];
547  }
548  &v2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l});
549}
550
551sub v2q_post_process {
552  my ($chr, $seq, $qual, $gaps, $l) = @_;
553  for my $g (@$gaps) {
554    my $beg = $g->[0] > $l? $g->[0] - $l : 0;
555    my $end = $g->[0] + $g->[1] + $l;
556    $end = length($$seq) if ($end > length($$seq));
557    substr($$seq, $beg, $end - $beg) = lc(substr($$seq, $beg, $end - $beg));
558  }
559  print "\@$chr\n"; &v2q_print_str($seq);
560  print "+\n"; &v2q_print_str($qual);
561}
562
563sub v2q_print_str {
564  my ($s) = @_;
565  my $l = length($$s);
566  for (my $i = 0; $i < $l; $i += 60) {
567    print substr($$s, $i, 60), "\n";
568  }
569}
570
571sub usage {
572  die(qq/
573Usage:   vcfutils.pl <command> [<arguments>]\n
574Command: subsam       get a subset of samples
575         listsam      list the samples
576         fillac       fill the allele count field
577         qstats       SNP stats stratified by QUAL
578
579         hapmap2vcf   convert the hapmap format to VCF
580         ucscsnp2vcf  convert UCSC SNP SQL dump to VCF
581
582         varFilter    filtering short variants (*)
583         vcf2fq       VCF->fastq (**)
584
585Notes: Commands with description endting with (*) may need bcftools
586       specific annotations.
587\n/);
588}
589