1 /*
2     vcflib C++ library for parsing and manipulating VCF files
3 
4     Copyright © 2010-2020 Erik Garrison
5     Copyright © 2020      Pjotr Prins
6 
7     This software is published under the MIT License. See the LICENSE file.
8 */
9 
10 #include "Variant.h"
11 #include "split.h"
12 #include "Fasta.h"
13 #include "gpatInfo.hpp"
14 #include <getopt.h>
15 
16 using namespace std;
17 using namespace vcflib;
18 
printSummary(char ** argv)19 void printSummary(char** argv) {
20     cerr << "usage: " << argv[0] << " [options] <vcf file>" << endl
21          << endl
22          << "Validate integrity and identity of the VCF by verifying that the VCF record's REF matches a given reference file." << endl
23          << "options:" << endl
24          << "    -f, --fasta-reference  FASTA reference file to use to obtain primer sequences" << endl
25          << "    -x, --exclude-failures If a record fails, don't print it.  Otherwise do." << endl
26          << "    -k, --keep-failures    Print if the record fails, otherwise not." << endl
27 	 << "    -h, --help       Print this message." << endl
28 	 << "    -v, --version    Print version." << endl
29          << endl;
30       cerr << endl << "Type: metrics" << endl << endl;
31     exit(0);
32 }
33 
34 
main(int argc,char ** argv)35 int main(int argc, char** argv) {
36 
37     int c;
38     string fastaRef;
39     bool keepFailures = false;
40     bool excludeFailures = false;
41 
42     if (argc == 1)
43         printSummary(argv);
44 
45     while (true) {
46         static struct option long_options[] =
47             {
48                 /* These options set a flag. */
49                 //{"verbose", no_argument,       &verbose_flag, 1},
50                 {"help", no_argument, 0, 'h'},
51                 {"fasta-reference",  required_argument, 0, 'f'},
52                 {"exclude-failures",  no_argument, 0, 'x'},
53                 {"keep-failures",  no_argument, 0, 'k'},
54                 {"version",  no_argument, 0, 'v'},
55                 //{"length",  no_argument, &printLength, true},
56                 {0, 0, 0, 0}
57             };
58         /* getopt_long stores the option index here. */
59         int option_index = 0;
60 
61         c = getopt_long (argc, argv, "hvxkf:",
62                          long_options, &option_index);
63 
64         /* Detect the end of the options. */
65         if (c == -1)
66             break;
67 
68         switch (c)
69         {
70         case 0:
71             /* If this option set a flag, do nothing else now. */
72             if (long_options[option_index].flag != 0)
73                 break;
74             printf ("option %s", long_options[option_index].name);
75             if (optarg)
76                 printf (" with arg %s", optarg);
77             printf ("\n");
78             break;
79 
80         case 'f':
81 	  {
82             fastaRef = optarg;
83             break;
84 	  }
85 	case 'v':
86 	  {
87 	    printBasicVersion();
88 	    exit(0);
89 	  }
90         case 'x':
91 	  {
92             excludeFailures = true;
93             break;
94 	  }
95         case 'k':
96 	  {
97             keepFailures = true;
98             break;
99 	  }
100         case 'h':
101 	  {
102             printSummary(argv);
103             exit(0);
104             break;
105 	  }
106         case '?':
107 	  {
108             /* getopt_long already printed an error message. */
109             printSummary(argv);
110             exit(1);
111             break;
112 	  }
113         default:
114             abort ();
115         }
116     }
117 
118     if (fastaRef.empty()) {
119         cerr << "a FASTA reference sequence must be specified" << endl;
120         exit(1);
121     }
122 
123     FastaReference ref;
124     ref.open(fastaRef);
125 
126     VariantCallFile variantFile;
127     string inputFilename;
128     if (optind == argc - 1) {
129         inputFilename = argv[optind];
130         variantFile.open(inputFilename);
131     } else {
132         variantFile.open(std::cin);
133     }
134 
135     if (!variantFile.is_open()) {
136         return 1;
137     }
138 
139     if (keepFailures || excludeFailures) {
140         cout << variantFile.header << endl;
141     }
142 
143     Variant var(variantFile);
144     while (variantFile.getNextVariant(var)) {
145         int refstart = var.position - 1; // convert to 0-based
146         string matchedRef = ref.getSubSequence(var.sequenceName, refstart, var.ref.size());
147         if (var.ref != matchedRef) {
148             if (keepFailures) {
149                 cout << var << endl;
150             } else if (!excludeFailures) {
151                 cout << "mismatched reference " << var.ref << " should be " << matchedRef << " at "
152                      << var.sequenceName << ":" << var.position << endl;
153             }
154         } else if (excludeFailures) {
155             cout << var << endl;
156         }
157     }
158 
159     return 0;
160 
161 }
162