1 #include "Variant.h"
2 #include "split.h"
3 #include "Fasta.h"
4 #include <getopt.h>
5 
6 using namespace std;
7 using namespace vcflib;
8 
printSummary(char ** argv)9 void printSummary(char** argv) {
10     cerr << "usage: " << argv[0] << " [options] <vcf file>" << endl
11          << endl
12          << "options:" << endl
13          << "    -f, --fasta-reference  FASTA reference file to use to obtain primer sequences" << endl
14          << "    -l, --primer-length    The length of the primer sequences on each side of the variant" << endl
15          << endl
16          << "For each VCF record, extract the flanking sequences, and write them to stdout as FASTA" << endl
17          << "records suitable for alignment.  This tool is intended for use in designing validation" << endl
18          << "experiments.  Primers extracted which would flank all of the alleles at multi-allelic" << endl
19          << "sites.  The name of the FASTA \"reads\" indicates the VCF record which they apply to." << endl
20          << "The form is >CHROM_POS_LEFT for the 3' primer and >CHROM_POS_RIGHT for the 5' primer," << endl
21          << "for example:" << endl
22          << endl
23          << ">20_233255_LEFT" << endl
24          << "CCATTGTATATATAGACCATAATTTCTTTATCCAATCATCTGTTGATGGA" << endl
25          << ">20_233255_RIGHT" << endl
26          << "ACTCAGTTGATTCCATACCTTTGCCATCATGAATCATGTTGTAATAAACA" << endl
27          << endl;
28     exit(0);
29 }
30 
31 
main(int argc,char ** argv)32 int main(int argc, char** argv) {
33 
34     int c;
35     string fastaRef;
36     int primerLength = 0;
37 
38     if (argc == 1)
39         printSummary(argv);
40 
41     while (true) {
42         static struct option long_options[] =
43         {
44             /* These options set a flag. */
45             //{"verbose", no_argument,       &verbose_flag, 1},
46             {"help", no_argument, 0, 'h'},
47             {"fasta-reference",  required_argument, 0, 'f'},
48             {"primer-length", required_argument, 0, 'l'},
49             //{"length",  no_argument, &printLength, true},
50             {0, 0, 0, 0}
51         };
52         /* getopt_long stores the option index here. */
53         int option_index = 0;
54 
55         c = getopt_long (argc, argv, "hf:l:",
56                          long_options, &option_index);
57 
58       /* Detect the end of the options. */
59           if (c == -1)
60             break;
61 
62           switch (c)
63             {
64             case 0:
65             /* If this option set a flag, do nothing else now. */
66             if (long_options[option_index].flag != 0)
67               break;
68             printf ("option %s", long_options[option_index].name);
69             if (optarg)
70               printf (" with arg %s", optarg);
71             printf ("\n");
72             break;
73 
74           case 'f':
75             fastaRef = optarg;
76             break;
77 
78           case 'l':
79             primerLength = atoi(optarg);
80             break;
81 
82           case 'h':
83             printSummary(argv);
84             exit(0);
85             break;
86 
87           case '?':
88             /* getopt_long already printed an error message. */
89             printSummary(argv);
90             exit(1);
91             break;
92 
93           default:
94             abort ();
95           }
96       }
97 
98     if (primerLength == 0) {
99         cerr << "a primer length must be specified" << endl;
100         exit(1);
101     }
102     if (fastaRef.empty()) {
103         cerr << "a FASTA reference sequence must be specified" << endl;
104         exit(1);
105     }
106 
107     FastaReference ref;
108     ref.open(fastaRef);
109 
110     VariantCallFile variantFile;
111     string inputFilename;
112     if (optind == argc - 1) {
113         inputFilename = argv[optind];
114         variantFile.open(inputFilename);
115     } else {
116         variantFile.open(std::cin);
117     }
118 
119     if (!variantFile.is_open()) {
120         return 1;
121     }
122 
123     Variant var(variantFile);
124     while (variantFile.getNextVariant(var)) {
125         // get the ref start and end positions
126         int refstart = var.position - 1; // convert to 0-based
127         int refend = var.position + var.ref.size() - 1;
128         string leftprimer = ref.getSubSequence(var.sequenceName, refstart - primerLength, primerLength);
129         string rightprimer = ref.getSubSequence(var.sequenceName, refend, primerLength);
130         //cout << var << endl;
131         cout << ">" << var.sequenceName << "_" << var.position << "_LEFT" << endl
132              << leftprimer << endl
133              << ">" << var.sequenceName << "_" << var.position << "_RIGHT" << endl
134              << rightprimer << endl;
135     }
136 
137     return 0;
138 
139 }
140 
141