1 /*
2 vcflib C++ library for parsing and manipulating VCF files
3
4 Copyright © 2010-2020 Erik Garrison
5 Copyright © 2020 Pjotr Prins
6
7 This software is published under the MIT License. See the LICENSE file.
8 */
9
10 #include "Variant.h"
11 #include "split.h"
12 #include "Fasta.h"
13 #include "gpatInfo.hpp"
14 #include <getopt.h>
15
16 using namespace std;
17 using namespace vcflib;
18
printSummary(char ** argv)19 void printSummary(char** argv) {
20 cerr << "usage: " << argv[0] << " [options] <vcf file>" << endl
21 << endl
22 << "Validate integrity and identity of the VCF by verifying that the VCF record's REF matches a given reference file." << endl
23 << "options:" << endl
24 << " -f, --fasta-reference FASTA reference file to use to obtain primer sequences" << endl
25 << " -x, --exclude-failures If a record fails, don't print it. Otherwise do." << endl
26 << " -k, --keep-failures Print if the record fails, otherwise not." << endl
27 << " -h, --help Print this message." << endl
28 << " -v, --version Print version." << endl
29 << endl;
30 cerr << endl << "Type: metrics" << endl << endl;
31 exit(0);
32 }
33
34
main(int argc,char ** argv)35 int main(int argc, char** argv) {
36
37 int c;
38 string fastaRef;
39 bool keepFailures = false;
40 bool excludeFailures = false;
41
42 if (argc == 1)
43 printSummary(argv);
44
45 while (true) {
46 static struct option long_options[] =
47 {
48 /* These options set a flag. */
49 //{"verbose", no_argument, &verbose_flag, 1},
50 {"help", no_argument, 0, 'h'},
51 {"fasta-reference", required_argument, 0, 'f'},
52 {"exclude-failures", no_argument, 0, 'x'},
53 {"keep-failures", no_argument, 0, 'k'},
54 {"version", no_argument, 0, 'v'},
55 //{"length", no_argument, &printLength, true},
56 {0, 0, 0, 0}
57 };
58 /* getopt_long stores the option index here. */
59 int option_index = 0;
60
61 c = getopt_long (argc, argv, "hvxkf:",
62 long_options, &option_index);
63
64 /* Detect the end of the options. */
65 if (c == -1)
66 break;
67
68 switch (c)
69 {
70 case 0:
71 /* If this option set a flag, do nothing else now. */
72 if (long_options[option_index].flag != 0)
73 break;
74 printf ("option %s", long_options[option_index].name);
75 if (optarg)
76 printf (" with arg %s", optarg);
77 printf ("\n");
78 break;
79
80 case 'f':
81 {
82 fastaRef = optarg;
83 break;
84 }
85 case 'v':
86 {
87 printBasicVersion();
88 exit(0);
89 }
90 case 'x':
91 {
92 excludeFailures = true;
93 break;
94 }
95 case 'k':
96 {
97 keepFailures = true;
98 break;
99 }
100 case 'h':
101 {
102 printSummary(argv);
103 exit(0);
104 break;
105 }
106 case '?':
107 {
108 /* getopt_long already printed an error message. */
109 printSummary(argv);
110 exit(1);
111 break;
112 }
113 default:
114 abort ();
115 }
116 }
117
118 if (fastaRef.empty()) {
119 cerr << "a FASTA reference sequence must be specified" << endl;
120 exit(1);
121 }
122
123 FastaReference ref;
124 ref.open(fastaRef);
125
126 VariantCallFile variantFile;
127 string inputFilename;
128 if (optind == argc - 1) {
129 inputFilename = argv[optind];
130 variantFile.open(inputFilename);
131 } else {
132 variantFile.open(std::cin);
133 }
134
135 if (!variantFile.is_open()) {
136 return 1;
137 }
138
139 if (keepFailures || excludeFailures) {
140 cout << variantFile.header << endl;
141 }
142
143 Variant var(variantFile);
144 while (variantFile.getNextVariant(var)) {
145 int refstart = var.position - 1; // convert to 0-based
146 string matchedRef = ref.getSubSequence(var.sequenceName, refstart, var.ref.size());
147 if (var.ref != matchedRef) {
148 if (keepFailures) {
149 cout << var << endl;
150 } else if (!excludeFailures) {
151 cout << "mismatched reference " << var.ref << " should be " << matchedRef << " at "
152 << var.sequenceName << ":" << var.position << endl;
153 }
154 } else if (excludeFailures) {
155 cout << var << endl;
156 }
157 }
158
159 return 0;
160
161 }
162