1 #include "Variant.h"
2 #include "split.h"
3 #include "Fasta.h"
4 #include <getopt.h>
5
6 using namespace std;
7 using namespace vcflib;
8
printSummary(char ** argv)9 void printSummary(char** argv) {
10 cerr << "usage: " << argv[0] << " [options] <vcf file>" << endl
11 << endl
12 << "options:" << endl
13 << " -f, --fasta-reference FASTA reference file to use to obtain primer sequences" << endl
14 << " -l, --primer-length The length of the primer sequences on each side of the variant" << endl
15 << endl
16 << "For each VCF record, extract the flanking sequences, and write them to stdout as FASTA" << endl
17 << "records suitable for alignment. This tool is intended for use in designing validation" << endl
18 << "experiments. Primers extracted which would flank all of the alleles at multi-allelic" << endl
19 << "sites. The name of the FASTA \"reads\" indicates the VCF record which they apply to." << endl
20 << "The form is >CHROM_POS_LEFT for the 3' primer and >CHROM_POS_RIGHT for the 5' primer," << endl
21 << "for example:" << endl
22 << endl
23 << ">20_233255_LEFT" << endl
24 << "CCATTGTATATATAGACCATAATTTCTTTATCCAATCATCTGTTGATGGA" << endl
25 << ">20_233255_RIGHT" << endl
26 << "ACTCAGTTGATTCCATACCTTTGCCATCATGAATCATGTTGTAATAAACA" << endl
27 << endl;
28 exit(0);
29 }
30
31
main(int argc,char ** argv)32 int main(int argc, char** argv) {
33
34 int c;
35 string fastaRef;
36 int primerLength = 0;
37
38 if (argc == 1)
39 printSummary(argv);
40
41 while (true) {
42 static struct option long_options[] =
43 {
44 /* These options set a flag. */
45 //{"verbose", no_argument, &verbose_flag, 1},
46 {"help", no_argument, 0, 'h'},
47 {"fasta-reference", required_argument, 0, 'f'},
48 {"primer-length", required_argument, 0, 'l'},
49 //{"length", no_argument, &printLength, true},
50 {0, 0, 0, 0}
51 };
52 /* getopt_long stores the option index here. */
53 int option_index = 0;
54
55 c = getopt_long (argc, argv, "hf:l:",
56 long_options, &option_index);
57
58 /* Detect the end of the options. */
59 if (c == -1)
60 break;
61
62 switch (c)
63 {
64 case 0:
65 /* If this option set a flag, do nothing else now. */
66 if (long_options[option_index].flag != 0)
67 break;
68 printf ("option %s", long_options[option_index].name);
69 if (optarg)
70 printf (" with arg %s", optarg);
71 printf ("\n");
72 break;
73
74 case 'f':
75 fastaRef = optarg;
76 break;
77
78 case 'l':
79 primerLength = atoi(optarg);
80 break;
81
82 case 'h':
83 printSummary(argv);
84 exit(0);
85 break;
86
87 case '?':
88 /* getopt_long already printed an error message. */
89 printSummary(argv);
90 exit(1);
91 break;
92
93 default:
94 abort ();
95 }
96 }
97
98 if (primerLength == 0) {
99 cerr << "a primer length must be specified" << endl;
100 exit(1);
101 }
102 if (fastaRef.empty()) {
103 cerr << "a FASTA reference sequence must be specified" << endl;
104 exit(1);
105 }
106
107 FastaReference ref;
108 ref.open(fastaRef);
109
110 VariantCallFile variantFile;
111 string inputFilename;
112 if (optind == argc - 1) {
113 inputFilename = argv[optind];
114 variantFile.open(inputFilename);
115 } else {
116 variantFile.open(std::cin);
117 }
118
119 if (!variantFile.is_open()) {
120 return 1;
121 }
122
123 Variant var(variantFile);
124 while (variantFile.getNextVariant(var)) {
125 // get the ref start and end positions
126 int refstart = var.position - 1; // convert to 0-based
127 int refend = var.position + var.ref.size() - 1;
128 string leftprimer = ref.getSubSequence(var.sequenceName, refstart - primerLength, primerLength);
129 string rightprimer = ref.getSubSequence(var.sequenceName, refend, primerLength);
130 //cout << var << endl;
131 cout << ">" << var.sequenceName << "_" << var.position << "_LEFT" << endl
132 << leftprimer << endl
133 << ">" << var.sequenceName << "_" << var.position << "_RIGHT" << endl
134 << rightprimer << endl;
135 }
136
137 return 0;
138
139 }
140
141