1 #include "Variant.h"
2 #include "split.h"
3 #include <string>
4 #include <sstream>
5 #include <iostream>
6 #include <getopt.h>
7
8 using namespace std;
9 using namespace vcflib;
10
11
main(int argc,char ** argv)12 int main(int argc, char** argv) {
13
14 if (argc > 1 && (argv[1] == "-h" || argv[1] == "--help")) {
15 cerr << "usage: " << argv[0] << " <[input file] >[output vcf]" << endl
16 << "Adds summary statistics to each record summarizing qualities reported in" << endl
17 << "called genotypes. Uses:" << endl
18 << "RO (reference observation count), QR (quality sum reference observations)" << endl
19 << "AO (alternate observation count), QA (quality sum alternate observations)" << endl;
20 return 1;
21 }
22
23 VariantCallFile variantFile;
24 if (argc == 1) {
25 variantFile.open(std::cin);
26 } else {
27 string filename = argv[argc-1];
28 variantFile.open(filename);
29 if (!variantFile.is_open()) {
30 cerr << "could not open " << filename << endl;
31 return 1;
32 }
33 }
34
35 Variant var(variantFile);
36
37 variantFile.removeInfoHeaderLine("AQR");
38 variantFile.addHeaderLine("##INFO=<ID=AQR,Number=1,Type=Float,Description=\"Mean reference observation quality calculated by RO and QR in called samples.\">");
39 variantFile.removeInfoHeaderLine("AQA");
40 variantFile.addHeaderLine("##INFO=<ID=AQA,Number=A,Type=Float,Description=\"Mean alternate observation quality calculated by AO and QA in called samples.\">");
41 variantFile.removeInfoHeaderLine("QR");
42 variantFile.addHeaderLine("##INFO=<ID=QR,Number=1,Type=Float,Description=\"Quality sum of reference observations calculated by QR in called samples.\">");
43 variantFile.removeInfoHeaderLine("QA");
44 variantFile.addHeaderLine("##INFO=<ID=QA,Number=A,Type=Float,Description=\"Quality sum of alternate observations calculated by QA in called samples.\">");
45 variantFile.removeInfoHeaderLine("RQA");
46 variantFile.addHeaderLine("##INFO=<ID=RQA,Number=A,Type=Float,Description=\"Ratio of mean alternate observation quality to mean reference observation quality (MQA/MQR).\">");
47
48 // write the new header
49 cout << variantFile.header << endl;
50
51 // print the records, filtering is done via the setting of varA's output sample names
52 while (variantFile.getNextVariant(var)) {
53 int refobs = 0;
54 int refqual = 0;
55 vector<int> altobs(var.alt.size(), 0);
56 vector<int> altqual(var.alt.size(), 0);
57 for (map<string, map<string, vector<string> > >::iterator s = var.samples.begin();
58 s != var.samples.end(); ++s) {
59 map<string, vector<string> >& sample = s->second;
60 int x;
61 if (sample.find("RO") != sample.end()) {
62 convert(sample["RO"].front(), x);
63 refobs += x;
64 if (sample.find("QR") != sample.end()) {
65 convert(sample["QR"].front(), x);
66 refqual += x;
67 }
68 }
69 if (sample.find("AO") != sample.end()) {
70 vector<string>& aos = sample["AO"];
71 for (int i = 0; i != var.alt.size(); ++i) {
72 convert(aos[i], x);
73 altobs[i] += x;
74 }
75 if (sample.find("QA") != sample.end()) {
76 vector<string>& qas = sample["QA"];
77 for (int i = 0; i != var.alt.size(); ++i) {
78 convert(qas[i], x);
79 altqual[i] += x;
80 }
81 }
82 }
83 }
84 var.info["QR"].push_back(convert(refqual));
85 if (refobs == 0 || refqual == 0) {
86 var.info["AQR"].push_back(convert(0));
87 } else {
88 var.info["AQR"].push_back(convert((double)refqual/(double)refobs));
89 }
90
91 for (int i = 0; i != var.alt.size(); ++i) {
92 var.info["QA"].push_back(convert(altqual[i]));
93 var.info["AQA"].push_back(convert((double)altqual[i]/(double)altobs[i]));
94 if (refobs == 0 || refqual == 0) {
95 var.info["RQA"].push_back(convert(1));
96 } else {
97 var.info["RQA"].push_back(convert(((double)altqual[i]/(double)altobs[i]) /
98 ((double)refqual/(double)refobs)));
99 }
100 }
101 cout << var << endl;
102 }
103
104 return 0;
105
106 }
107
108