1 #include "Variant.h"
2 #include "split.h"
3 #include <string>
4 #include <sstream>
5 #include <iostream>
6 #include <getopt.h>
7 
8 using namespace std;
9 using namespace vcflib;
10 
11 
main(int argc,char ** argv)12 int main(int argc, char** argv) {
13 
14     if (argc > 1 && (argv[1] == "-h" || argv[1] == "--help")) {
15         cerr << "usage: " << argv[0] << " <[input file] >[output vcf]" << endl
16              << "Adds summary statistics to each record summarizing qualities reported in" << endl
17              << "called genotypes.  Uses:" << endl
18              << "RO (reference observation count), QR (quality sum reference observations)" << endl
19              << "AO (alternate observation count), QA (quality sum alternate observations)" << endl;
20         return 1;
21     }
22 
23     VariantCallFile variantFile;
24     if (argc == 1) {
25         variantFile.open(std::cin);
26     } else {
27         string filename = argv[argc-1];
28         variantFile.open(filename);
29         if (!variantFile.is_open()) {
30             cerr << "could not open " << filename << endl;
31             return 1;
32         }
33     }
34 
35     Variant var(variantFile);
36 
37     variantFile.removeInfoHeaderLine("AQR");
38     variantFile.addHeaderLine("##INFO=<ID=AQR,Number=1,Type=Float,Description=\"Mean reference observation quality calculated by RO and QR in called samples.\">");
39     variantFile.removeInfoHeaderLine("AQA");
40     variantFile.addHeaderLine("##INFO=<ID=AQA,Number=A,Type=Float,Description=\"Mean alternate observation quality calculated by AO and QA in called samples.\">");
41     variantFile.removeInfoHeaderLine("QR");
42     variantFile.addHeaderLine("##INFO=<ID=QR,Number=1,Type=Float,Description=\"Quality sum of reference observations calculated by QR in called samples.\">");
43     variantFile.removeInfoHeaderLine("QA");
44     variantFile.addHeaderLine("##INFO=<ID=QA,Number=A,Type=Float,Description=\"Quality sum of alternate observations calculated by QA in called samples.\">");
45     variantFile.removeInfoHeaderLine("RQA");
46     variantFile.addHeaderLine("##INFO=<ID=RQA,Number=A,Type=Float,Description=\"Ratio of mean alternate observation quality to mean reference observation quality (MQA/MQR).\">");
47 
48     // write the new header
49     cout << variantFile.header << endl;
50 
51     // print the records, filtering is done via the setting of varA's output sample names
52     while (variantFile.getNextVariant(var)) {
53         int refobs = 0;
54         int refqual = 0;
55         vector<int> altobs(var.alt.size(), 0);
56         vector<int> altqual(var.alt.size(), 0);
57         for (map<string, map<string, vector<string> > >::iterator s = var.samples.begin();
58              s != var.samples.end(); ++s) {
59             map<string, vector<string> >& sample = s->second;
60             int x;
61             if (sample.find("RO") != sample.end()) {
62                 convert(sample["RO"].front(), x);
63                 refobs += x;
64                 if (sample.find("QR") != sample.end()) {
65                     convert(sample["QR"].front(), x);
66                     refqual += x;
67                 }
68             }
69             if (sample.find("AO") != sample.end()) {
70                 vector<string>& aos = sample["AO"];
71                 for (int i = 0; i != var.alt.size(); ++i) {
72                     convert(aos[i], x);
73                     altobs[i] += x;
74                 }
75                 if (sample.find("QA") != sample.end()) {
76                     vector<string>& qas = sample["QA"];
77                     for (int i = 0; i != var.alt.size(); ++i) {
78                         convert(qas[i], x);
79                         altqual[i] += x;
80                     }
81                 }
82             }
83         }
84         var.info["QR"].push_back(convert(refqual));
85         if (refobs == 0 || refqual == 0) {
86             var.info["AQR"].push_back(convert(0));
87         } else {
88             var.info["AQR"].push_back(convert((double)refqual/(double)refobs));
89         }
90 
91         for (int i = 0; i != var.alt.size(); ++i) {
92             var.info["QA"].push_back(convert(altqual[i]));
93             var.info["AQA"].push_back(convert((double)altqual[i]/(double)altobs[i]));
94             if (refobs == 0 || refqual == 0) {
95                 var.info["RQA"].push_back(convert(1));
96             } else {
97                 var.info["RQA"].push_back(convert(((double)altqual[i]/(double)altobs[i]) /
98                                                   ((double)refqual/(double)refobs)));
99             }
100         }
101         cout << var << endl;
102     }
103 
104     return 0;
105 
106 }
107 
108