1 /*
2 * vcf_entry.cpp
3 *
4 * Created on: Aug 19, 2009
5 * Author: Adam Auton
6 * ($Revision: 230 $)
7 */
8
9 #include "vcf_entry.h"
10
11 string vcf_entry::convert_line;
12
vcf_entry(header & meta_data,vector<bool> & include_individual)13 vcf_entry::vcf_entry(header &meta_data, vector<bool> &include_individual)
14 {
15 N_indv = meta_data.N_indv;
16 include_indv = include_individual;
17 include_genotype = vector<bool>(N_indv, true);
18 basic_parsed = false; fully_parsed = false;
19 parsed_ALT = false; parsed_FILTER = false;
20 parsed_INFO = false; parsed_FORMAT = false;
21 CHROM = ""; POS = -1; REF = ""; QUAL = -1;
22 passed_filters = true; parsed_FORMAT_binary = false;
23 N_INFO_removed = 0; N_FORMAT_removed = 0;
24 parsed_GT = vector<bool>(N_indv, false); parsed_GQ = vector<bool>(N_indv, false);
25 parsed_DP = vector<bool>(N_indv, false); parsed_FT = vector<bool>(N_indv, false);
26 GT_idx = -1; GQ_idx = -1; DP_idx = -1; FT_idx = -1;
27 FORMAT_positions.resize(N_indv); FORMAT_types.resize(N_indv); FORMAT_sizes.resize(N_indv); FORMAT_skip.resize(N_indv); FORMAT_keys.resize(N_indv);
28 convert_line.clear();
29 data_stream.str("");
30 entry_header = meta_data;
31 }
32
~vcf_entry()33 vcf_entry::~vcf_entry() {}
34
35 // Reset the VCF entry object with a new data line
reset(const vector<char> & data_line)36 void vcf_entry::reset(const vector<char> &data_line)
37 {
38 basic_parsed = false;
39 fully_parsed = false;
40 parsed_ALT = false;
41 parsed_FILTER = false;
42 parsed_INFO = false;
43 parsed_FORMAT = false;
44 parsed_FORMAT_binary = false;
45 passed_filters = true;
46
47 data_stream.clear();
48 line = data_line;
49
50 convert_line.assign(line.begin(), line.end());
51 data_stream.str(convert_line);
52
53 fill(parsed_GT.begin(), parsed_GT.end(), 0);
54 fill(parsed_GQ.begin(), parsed_GQ.end(), 0);
55 fill(parsed_DP.begin(), parsed_DP.end(), 0);
56 fill(parsed_FT.begin(), parsed_FT.end(), 0);
57 fill(include_genotype.begin(), include_genotype.end(), 1);
58
59 N_INFO_removed = 0; N_FORMAT_removed = 0;
60 FORMAT_positions.clear(); FORMAT_types.clear(); FORMAT_sizes.clear(); FORMAT_skip.clear(); FORMAT_keys.clear();
61 }
62
63 // Tokenize the basic information in a VCF data line (at the tab level)
parse_basic_entry(bool parse_ALT,bool parse_FILTER,bool parse_INFO)64 void vcf_entry::parse_basic_entry(bool parse_ALT, bool parse_FILTER, bool parse_INFO)
65 {
66 if(!basic_parsed)
67 {
68 getline(data_stream, CHROM, '\t');
69
70 getline(data_stream, ID, '\t');
71 POS = atoi(ID.c_str());
72 getline(data_stream, ID, '\t');
73 getline(data_stream, REF, '\t');
74 getline(data_stream, ALT_str, '\t');
75 getline(data_stream, QUAL_str, '\t');
76 getline(data_stream, FILTER_str, '\t');
77 getline(data_stream, INFO_str, '\t');
78
79 QUAL = header::str2double(QUAL_str);
80
81 // Convert to uppercase for consistency
82 // Note that VCF v4.1 allows mixtures of lower/upper case in REF and ALT.
83 // However, the spec specifically states that tools using VCF are not required
84 // to preserve the case.
85 std::transform(REF.begin(), REF.end(), REF.begin(), ::toupper);
86 std::transform(ALT_str.begin(), ALT_str.end(),ALT_str.begin(), ::toupper);
87 }
88 basic_parsed = true;
89
90 if (parse_ALT && !parsed_ALT)
91 set_ALT(ALT_str);
92 if (parse_FILTER && !parsed_FILTER)
93 set_FILTER(FILTER_str);
94 if (parse_INFO && !parsed_INFO)
95 set_INFO(INFO_str);
96 }
97
98 // Tokenize the genotype information (at the 'tab' level) in the VCF entry
parse_full_entry(bool parse_FORMAT)99 void vcf_entry::parse_full_entry(bool parse_FORMAT)
100 {
101 if (fully_parsed)
102 return;
103
104 if (basic_parsed == false)
105 parse_basic_entry();
106
107 getline(data_stream, FORMAT_str, '\t');
108
109 if (parse_FORMAT)
110 set_FORMAT(FORMAT_str);
111
112 string tmpstr; tmpstr.reserve(64);
113 GENOTYPE_str.resize(N_indv, tmpstr);
114
115 for (unsigned int ui=0; ui<N_indv; ui++)
116 getline(data_stream, GENOTYPE_str[ui], '\t');
117
118 fully_parsed = true;
119 }
120
121 // Tokenize a given genotype entry into it's component parts
parse_genotype_entry(unsigned int indv,bool GT,bool GQ,bool DP,bool FT)122 void vcf_entry::parse_genotype_entry(unsigned int indv, bool GT, bool GQ, bool DP, bool FT)
123 {
124 if (fully_parsed == false)
125 parse_full_entry(true);
126
127 if (parsed_FORMAT == false)
128 set_FORMAT(FORMAT_str);
129
130 static string tmpstr;
131 int N_required = GT + GQ + DP + FT;
132 int N_got = 0;
133
134 int i=0, start = 0, end = 0;
135 while ((end = GENOTYPE_str[indv].find(':',start)) != string::npos)
136 {
137 if (GT && (i == GT_idx))
138 {
139 tmpstr = GENOTYPE_str[indv].substr(start, end - start);
140 set_indv_GENOTYPE_and_PHASE(indv, tmpstr);
141 N_got++;
142 }
143 else if (GQ && (i == GQ_idx))
144 {
145 tmpstr = GENOTYPE_str[indv].substr(start, end - start);
146 set_indv_GQUALITY(indv, header::str2double(tmpstr));
147 N_got++;
148 }
149 else if (DP && (i == DP_idx))
150 {
151 tmpstr = GENOTYPE_str[indv].substr(start, end - start);
152 set_indv_DEPTH(indv, header::str2int(tmpstr));
153 N_got++;
154 }
155 else if (FT && (i == FT_idx))
156 {
157 tmpstr = GENOTYPE_str[indv].substr(start, end - start);
158 set_indv_GFILTER(indv, tmpstr);
159 N_got++;
160 }
161
162 if (N_got == N_required)
163 break;
164 i++;
165 start = end + 1;
166 }
167 if (N_got != N_required)
168 {
169 if (GT && (i == GT_idx))
170 {
171 tmpstr = GENOTYPE_str[indv].substr(start);
172 set_indv_GENOTYPE_and_PHASE(indv, tmpstr);
173 N_got++;
174 }
175 else if (GQ && (i == GQ_idx))
176 {
177 tmpstr = GENOTYPE_str[indv].substr(start);
178 set_indv_GQUALITY(indv, header::str2double(tmpstr));
179 N_got++;
180 }
181 else if (DP && (i == DP_idx))
182 {
183 tmpstr = GENOTYPE_str[indv].substr(start);
184 set_indv_DEPTH(indv, header::str2int(tmpstr));
185 N_got++;
186 }
187 else if (FT && (i == FT_idx))
188 {
189 tmpstr = GENOTYPE_str[indv].substr(start);
190 set_indv_GFILTER(indv, tmpstr);
191 N_got++;
192 }
193 }
194
195 // Set missing return values if requested a value, but couldn't find it
196 if (GT && (parsed_GT[indv] == false))
197 {
198 set_indv_GENOTYPE_and_PHASE(indv, make_pair(-1,-1), '/');
199 }
200 if (GQ && (parsed_GQ[indv] == false))
201 {
202 set_indv_GQUALITY(indv, -1);
203 }
204 if (DP && (parsed_DP[indv] == false))
205 {
206 set_indv_DEPTH(indv, -1);
207 }
208 if (FT && (parsed_FT[indv] == false))
209 {
210 set_indv_GFILTER(indv, "");
211 }
212 }
213
214 // Read the VCF entry and fully populate the object
parse_genotype_entries(bool GT,bool GQ,bool DP,bool FT)215 void vcf_entry::parse_genotype_entries(bool GT, bool GQ, bool DP, bool FT)
216 {
217 for (unsigned int ui=0; ui<N_indv; ui++)
218 parse_genotype_entry(ui, GT, GQ, DP, FT);
219 }
220
parse_FORMAT()221 void vcf_entry::parse_FORMAT()
222 {
223 FORMAT_binary.resize(0);
224 vector<char> tmp_vector;
225 vector<string> tmp_split;
226 vector< vector<string> > format_matrix(N_indv);
227
228 unsigned int type, number, size, position=0;
229 tmp_split.resize(FORMAT.size());
230 for (unsigned int ui=0; ui<N_indv; ui++)
231 {
232 format_matrix[ui].resize(FORMAT.size());
233 if ( (GENOTYPE_str[ui] != "") and (GENOTYPE_str[ui] != ".") )
234 header::split(GENOTYPE_str[ui],':', tmp_split);
235 else
236 tmp_split.assign(FORMAT.size(),".");
237
238 for (unsigned int uj=0; uj<tmp_split.size(); uj++)
239 format_matrix[ui][uj] = tmp_split[uj];
240
241 for (unsigned int uj=format_matrix[ui].size(); uj<FORMAT.size(); uj++)
242 format_matrix[ui][uj] = ".";
243 }
244
245 FORMAT_positions.resize(FORMAT.size()); FORMAT_types.resize(FORMAT.size()); FORMAT_skip.resize(FORMAT.size());
246
247 for (unsigned int ui=0; ui<FORMAT.size(); ui++)
248 {
249 if (entry_header.FORMAT_reverse_map.find(FORMAT[ui]) == entry_header.FORMAT_reverse_map.end())
250 {
251 LOG.one_off_warning("FORMAT value " + FORMAT[ui] + " is not defined in the header and will not be encoded.");
252 N_FORMAT_removed++;
253 continue;
254 }
255 FORMAT_positions[ui] = position;
256 make_typed_int(tmp_vector, entry_header.FORMAT_reverse_map[ FORMAT[ui] ], true );
257 FORMAT_binary.insert(FORMAT_binary.end(), tmp_vector.begin(), tmp_vector.end() );
258
259 tmp_vector.resize(0);
260 tmp_split.resize(0);
261
262 type = entry_header.FORMAT_map[ entry_header.FORMAT_reverse_map[FORMAT[ui]] ].Type;
263 number = entry_header.FORMAT_map[ entry_header.FORMAT_reverse_map[FORMAT[ui]] ].N_entries;
264
265 for (unsigned int uj=0; uj<N_indv; uj++)
266 tmp_split.push_back( format_matrix[uj][ui] );
267
268 if ((int)ui == GT_idx)
269 make_typed_GT_vector(tmp_vector, tmp_split );
270 else if (type == Integer)
271 make_typed_int_vector(tmp_vector, tmp_split, number );
272 else if (type == Float)
273 make_typed_float_vector(tmp_vector, tmp_split, number );
274 else if ( (type == Character) or (type == String) )
275 make_typed_string_vector(tmp_vector, tmp_split, number );
276 else
277 LOG.error("Invalid type in FORMAT definition", 0);
278
279 position = 0;
280 get_type(&position, tmp_vector, type, size);
281 FORMAT_types[ui] = type;
282
283 if ( (type == 1) || (type == 7) )
284 FORMAT_skip[ui] = size*sizeof(int8_t);
285 else if (type == 2)
286 FORMAT_skip[ui] = size*sizeof(int16_t);
287 else if ( (type == 3) || (type == 5) )
288 FORMAT_skip[ui] = size*sizeof(int32_t);
289
290 FORMAT_binary.insert(FORMAT_binary.end(), tmp_vector.begin(), tmp_vector.end() );
291 tmp_vector.resize(0);
292 position = FORMAT_binary.size();
293 }
294 parsed_FORMAT_binary = true;
295 }
296
297 // Output VCF entry to output stream
print(ostream & out,const set<string> & INFO_to_keep,bool keep_all_INFO)298 void vcf_entry::print(ostream &out, const set<string> &INFO_to_keep, bool keep_all_INFO)
299 {
300 if (fully_parsed == false)
301 parse_full_entry();
302
303 out << get_CHROM() << '\t' << POS << '\t' << get_ID() << '\t' << REF << '\t' << get_ALT();
304
305 out << '\t' << header::double2str(QUAL);
306 out << '\t' << get_FILTER();
307
308 if (keep_all_INFO == false)
309 out << '\t' << get_INFO(INFO_to_keep);
310 else
311 out << '\t' << INFO_str;
312
313 pair<int, int> genotype;
314 string GFILTER_tmp;
315 if (FORMAT.size() > 0)
316 {
317 char PHASE;
318 out << '\t' << get_FORMAT();
319
320 for (unsigned int ui=0; ui<N_indv; ui++)
321 {
322 if (include_indv[ui] == false)
323 continue;
324 out << '\t';
325 for (int count=0; count<(int)FORMAT.size(); count++)
326 {
327 if (count == GT_idx) // (FORMAT[count] == "GT")
328 {
329 if (count != 0) out << ':';
330 if (include_genotype[ui] == true)
331 {
332 get_indv_GENOTYPE_ids(ui, genotype);
333 PHASE = get_indv_PHASE(ui);
334 if ((genotype.first != -1) && (genotype.second != -1))
335 out << header::int2str(genotype.first) << PHASE << header::int2str(genotype.second);
336 else if ((PHASE == '|') && (genotype.second == -1))
337 out << header::int2str(genotype.first); // Handle haploid case
338 else
339 out << header::int2str(genotype.first) << PHASE << header::int2str(genotype.second);
340 }
341 else
342 out << "./.";
343 }
344 else if (count == GQ_idx) //(FORMAT[count] == "GQ")
345 {
346 if (count != 0) out << ':';
347 out << header::double2str(get_indv_GQUALITY(ui));
348 }
349 else if (count == DP_idx) // (FORMAT[count] == "DP")
350 {
351 if (count != 0) out << ':';
352 out << header::int2str(get_indv_DEPTH(ui));
353 }
354 else if (count == FT_idx) // (FORMAT[count] == "FT")
355 {
356 if (count != 0) out << ':';
357 get_indv_GFILTER(ui, GFILTER_tmp);
358 out << GFILTER_tmp;
359 }
360 else
361 { // Unknown FORMAT so just replicate original output
362 if (count != 0) out << ':';
363 read_indv_generic_entry(ui, FORMAT[count], GFILTER_tmp);
364 out << GFILTER_tmp;
365 }
366 }
367 }
368 }
369 out << '\n';
370 }
371
372 // Output VCF entry to output stream in binary
print_bcf(BGZF * out,const set<string> & INFO_to_keep,bool keep_all_INFO)373 void vcf_entry::print_bcf(BGZF* out, const set<string> &INFO_to_keep, bool keep_all_INFO)
374 {
375 if (fully_parsed == false)
376 parse_full_entry();
377
378 if (parsed_FORMAT_binary == false)
379 parse_FORMAT();
380
381 vector<char> out_vector, tmp_vector;
382 out_vector.resize(8*sizeof(int32_t));
383 int vector_pos = 2*sizeof(uint32_t);
384 string tmp_string;
385 int index;
386 vector<string> filter_vector;
387 vector<pair< string, string > > tmp_info;
388
389 tmp_string = get_CHROM();
390 if (tmp_string == "." or tmp_string == " " or tmp_string == "")
391 LOG.error("CHROM value must be defined for all entries.",0);
392
393 if (entry_header.CONTIG_reverse_map.find(tmp_string) == entry_header.CONTIG_reverse_map.end() )
394 LOG.error("CHROM value " + tmp_string + " is not defined on contig dictionary.",0);
395
396 int32_t chrom = (int32_t)entry_header.CONTIG_reverse_map[tmp_string];
397 memcpy(&out_vector[vector_pos], &chrom, sizeof(chrom));
398 vector_pos += sizeof(chrom);
399
400 get_POS_binary(tmp_vector);
401 memcpy(&out_vector[vector_pos], &tmp_vector[0], tmp_vector.size());
402 vector_pos += tmp_vector.size();
403 tmp_vector.resize(0);
404
405 get_rlen(tmp_vector);
406 memcpy(&out_vector[vector_pos], &tmp_vector[0], tmp_vector.size());
407 vector_pos += tmp_vector.size();
408 tmp_vector.resize(0);
409
410 get_QUAL_binary(tmp_vector);
411 memcpy(&out_vector[vector_pos], &tmp_vector[0], tmp_vector.size());
412 vector_pos += tmp_vector.size();
413 tmp_vector.resize(0);
414
415 get_ID_binary(tmp_vector);
416 out_vector.insert(out_vector.end(), tmp_vector.begin(), tmp_vector.end());
417 tmp_vector.resize(0);
418
419 get_ALLELES_binary(tmp_vector);
420 out_vector.insert(out_vector.end(), tmp_vector.begin(), tmp_vector.end());
421 tmp_vector.resize(0);
422
423 get_FILTER_vector(filter_vector);
424 if (filter_vector.empty())
425 make_typed_int_vector(tmp_vector, filter_vector);
426 else
427 {
428 vector<int> index_vector;
429 for(unsigned int ui=0; ui<filter_vector.size(); ui++)
430 {
431 if ( entry_header.FILTER_reverse_map.find( filter_vector[ui] ) == entry_header.FILTER_reverse_map.end() )
432 LOG.one_off_warning("FILTER value " + filter_vector[ui] + " is not defined in the header and will not be encoded.");
433 else
434 index_vector.push_back( entry_header.FILTER_reverse_map[ filter_vector[ui] ] );
435 }
436 make_typed_int_vector(tmp_vector, index_vector );
437 }
438
439 if (tmp_vector.empty())
440 make_typed_int(tmp_vector, 0, true);
441
442 out_vector.insert(out_vector.end(), tmp_vector.begin(), tmp_vector.end());
443
444 int map_type, number;
445 tmp_info = get_INFO_vector(INFO_to_keep, keep_all_INFO);
446
447 for(unsigned int ui=0; ui<tmp_info.size(); ui++)
448 {
449 if (entry_header.INFO_reverse_map.find(tmp_info[ui].first) == entry_header.INFO_reverse_map.end())
450 {
451 LOG.one_off_warning("INFO value " + tmp_info[ui].first + " is not defined in the header and will not be encoded.");
452 N_INFO_removed++;
453 continue;
454 }
455
456 tmp_vector.resize(0);
457 index = entry_header.INFO_reverse_map[ tmp_info[ui].first ];
458 make_typed_int(tmp_vector, index, true);
459 out_vector.insert(out_vector.end(), tmp_vector.begin(), tmp_vector.end());
460
461 tmp_vector.resize(0);
462 map_type = entry_header.INFO_map[ index ].Type;
463 number = entry_header.INFO_map[ index ].N_entries;
464
465 if (map_type == Integer)
466 make_typed_int_vector(tmp_vector, tmp_info[ui].second, number );
467 else if (map_type == Float)
468 make_typed_float_vector(tmp_vector, tmp_info[ui].second, number );
469 else if ( (map_type == Character) or (map_type == String) )
470 make_typed_string(tmp_vector, tmp_info[ui].second, true );
471 else if (map_type == Flag)
472 make_typed_int(tmp_vector, 1, true );
473 else
474 LOG.error("Invalid type in INFO definition", 0);
475
476 out_vector.insert(out_vector.end(), tmp_vector.begin(), tmp_vector.end());
477 }
478 tmp_vector.resize(0);
479 uint32_t l_shared = (uint32_t)out_vector.size() - (uint32_t)(2*sizeof(uint32_t));
480 memcpy(&out_vector[0], &l_shared, sizeof(l_shared));
481
482 tmp_vector.resize(0);
483 get_n_allele_info(tmp_vector);
484 memcpy(&out_vector[vector_pos], &tmp_vector[0], tmp_vector.size());
485 vector_pos += tmp_vector.size();
486
487 tmp_vector.resize(0);
488 get_FORMAT_binary(tmp_vector);
489 unsigned int key_size, size, type, old_pos, out_size, format_pos = 0;
490 unsigned int skip_size = 0;
491 for (unsigned int ui=0; ui<(FORMAT.size()-N_FORMAT_removed); ui++)
492 {
493 format_pos = FORMAT_positions[ui];
494 old_pos = format_pos;
495
496 get_typed_int(&format_pos, tmp_vector, type, size);
497 get_type(&format_pos, tmp_vector, type, size);
498 key_size = format_pos - old_pos;
499 out_size = out_vector.size();
500 out_vector.resize( out_size + key_size );
501 memcpy(&out_vector[out_size], &tmp_vector[old_pos], key_size );
502
503 skip_size = FORMAT_skip[ui];
504 for (unsigned int uj=0; uj<N_indv; uj++)
505 {
506 if (include_indv[uj] == false)
507 continue;
508
509 format_pos = FORMAT_positions[ui]+key_size+skip_size*uj;
510 out_size = out_vector.size();
511 out_vector.resize( out_size + skip_size );
512 if ( ((int)ui == GT_idx) and (include_genotype[ui] == false) )
513 {
514 for (int p = 0; p < (int)skip_size; p++)
515 {
516 if (p < ploidy[uj])
517 out_vector[out_size] = (int8_t)0x80;
518 else
519 out_vector[out_size] = (int8_t)0x81;
520 out_size++;
521 }
522 }
523 else
524 {
525 memcpy(&out_vector[out_size], &tmp_vector[format_pos], skip_size);
526 out_size += skip_size;
527 }
528 }
529 }
530 tmp_vector.resize(0);
531 get_n_fmt_sample(tmp_vector);
532 memcpy(&out_vector[vector_pos], &tmp_vector[0], tmp_vector.size());
533
534 uint32_t l_indv = (uint32_t)out_vector.size() - l_shared - (uint32_t)(2*sizeof(uint32_t));
535 memcpy(&out_vector[sizeof(l_shared)], &l_indv, sizeof(l_indv));
536 bgzf_write(out, &out_vector[0], out_vector.size());
537 }
538
539 // Set the include_genotype flag on the basis of depth
filter_genotypes_by_depth(int min_depth,int max_depth)540 void vcf_entry::filter_genotypes_by_depth(int min_depth, int max_depth)
541 {
542 if (fully_parsed == false)
543 parse_full_entry();
544
545 if (DP_idx != -1)
546 { // Have depth info
547 int depth;
548 for (unsigned int ui=0; ui<N_indv; ui++)
549 {
550 if (parsed_DP[ui] == false)
551 parse_genotype_entry(ui, false, false, true);
552 depth = get_indv_DEPTH(ui);
553 if ((depth < min_depth) || (depth > max_depth))
554 include_genotype[ui] = false;
555 }
556 }
557 }
558
559 // Filter specific genotypes by quality
filter_genotypes_by_quality(double min_genotype_quality)560 void vcf_entry::filter_genotypes_by_quality(double min_genotype_quality)
561 {
562 if (fully_parsed == false)
563 parse_full_entry();
564
565 if (GQ_idx != -1)
566 { // Have quality info
567 double quality;
568 for (unsigned int ui=0; ui<N_indv; ui++)
569 {
570 if (parsed_GQ[ui] == false)
571 parse_genotype_entry(ui, false, true);
572 quality = get_indv_GQUALITY(ui);
573 if (quality < min_genotype_quality)
574 include_genotype[ui] = false;
575 }
576 }
577 }
578
579 // Exclude genotypes with a filter flag.
filter_genotypes_by_filter_status(const set<string> & filter_flags_to_remove,bool remove_all)580 void vcf_entry::filter_genotypes_by_filter_status(const set<string> &filter_flags_to_remove, bool remove_all)
581 {
582 if (fully_parsed == false)
583 parse_full_entry();
584
585 vector<string> GFILTERs;
586 if (FT_idx != -1)
587 { // Have GFilter info
588 for (unsigned int ui=0; ui<N_indv; ui++)
589 {
590 if (parsed_FT[ui] == false)
591 parse_genotype_entry(ui, false, false, false, true);
592 get_indv_GFILTER_vector(ui, GFILTERs);
593
594 if (remove_all == true)
595 { // If removing all filters, only keep things with label PASS
596 if (!GFILTERs.empty())
597 if ((GFILTERs[0] != "PASS") && (GFILTERs[0] != "."))
598 include_genotype[ui] = false;
599 }
600 else
601 { // Only removing specific filters
602 for (unsigned int uj=0; uj<GFILTERs.size(); uj++)
603 if (filter_flags_to_remove.find(GFILTERs[uj]) != filter_flags_to_remove.end())
604 include_genotype[ui] = false;
605 }
606 }
607 }
608 }
609
read_indv_generic_entry(unsigned int indv,const string & FORMAT_id,string & out)610 void vcf_entry::read_indv_generic_entry(unsigned int indv, const string &FORMAT_id, string &out)
611 {
612 if (fully_parsed == false)
613 parse_full_entry(true);
614
615 if (parsed_FORMAT == false)
616 set_FORMAT(FORMAT_str);
617
618 out = ".";
619
620 if (FORMAT_to_idx.find(FORMAT_id) != FORMAT_to_idx.end())
621 {
622 unsigned int idx = FORMAT_to_idx[FORMAT_id];
623 static string tmpstr;
624 static istringstream ss;
625 ss.clear();
626 ss.str(GENOTYPE_str[indv]);
627
628 for (unsigned int ui=0; ui <= idx; ui++)
629 {
630 getline(ss, tmpstr, ':');
631 if (ui == idx)
632 {
633 out = tmpstr;
634 break;
635 }
636 if (!ss.good())
637 break;
638 }
639 }
640 }
641