1 /*
2  * bcf_entry.cpp
3  *
4  *  Created on: Sep 20, 2012
5  *      Author: Anthony Marcketta
6  *      ($Revision: 1 $)
7  */
8 
9 #include "bcf_entry.h"
10 
bcf_entry(header & header_obj,vector<bool> & include_individual)11 bcf_entry::bcf_entry(header &header_obj, vector<bool> &include_individual)
12 {
13 	N_indv = header_obj.N_indv;
14 	include_indv = include_individual;
15 	include_genotype = vector<bool>(N_indv, true);
16 	basic_parsed = false; fully_parsed = false;
17 	parsed_ALT = false; parsed_FILTER = false;
18 	parsed_INFO = false; parsed_FORMAT = false;
19 	CHROM = ""; POS = -1; REF = ""; QUAL = -1;
20 	N_INFO_removed = 0; N_FORMAT_removed = 0;
21 	passed_filters = true; parsed_FORMAT_binary = false;
22 	parsed_GT = vector<bool>(N_indv, false); parsed_GQ = vector<bool>(N_indv, false);
23 	parsed_DP = vector<bool>(N_indv, false); parsed_FT = vector<bool>(N_indv, false);
24 	GT_idx = -1; GQ_idx = -1; DP_idx = -1; FT_idx = -1;
25 	N_info = 0; N_format = 0; L_shared = 0; L_indiv = 0; line_pos = 0;
26 	N_allele = 0; INFO_pos = 0; FILTER_pos = 0; ALT_pos = 0; FORMAT_pos = 0;
27 	FORMAT_positions.resize(0); FORMAT_types.resize(0); FORMAT_sizes.resize(0); FORMAT_skip.resize(0); FORMAT_keys.resize(0);
28 	line.clear();
29 
30 	entry_header = header_obj;
31 }
32 
~bcf_entry()33 bcf_entry::~bcf_entry() {}
34 
reset(const vector<char> & data_line)35 void bcf_entry::reset(const vector<char> &data_line)
36 {
37 	basic_parsed = false;
38 	fully_parsed = false;
39 	parsed_ALT = false;
40 	parsed_FILTER = false;
41 	parsed_INFO = false;
42 	parsed_FORMAT = false;
43 	parsed_FORMAT_binary = false;
44 	passed_filters = true;
45 
46 	line = data_line;
47 
48 	fill(parsed_GT.begin(), parsed_GT.end(), false);
49 	fill(parsed_GQ.begin(), parsed_GQ.end(), false);
50 	fill(parsed_DP.begin(), parsed_DP.end(), false);
51 	fill(parsed_FT.begin(), parsed_FT.end(), false);
52 	fill(include_genotype.begin(), include_genotype.end(), true);
53 
54 	INFO_pos = 0; FILTER_pos = 0; ALT_pos = 0; FORMAT_pos = 0;
55 	FORMAT_positions.clear(); FORMAT_types.clear(); FORMAT_sizes.clear(); FORMAT_skip.clear(); FORMAT_keys.clear();
56 	N_INFO_removed = 0; N_FORMAT_removed = 0;
57 }
58 
parse_basic_entry(bool parse_ALT,bool parse_FILTER,bool parse_INFO)59 void bcf_entry::parse_basic_entry(bool parse_ALT, bool parse_FILTER, bool parse_INFO)
60 {
61 	if (line.empty())
62 	{
63 		if (parse_ALT)
64 			set_ALT("");
65 		return;
66 	}
67 
68 	if (!basic_parsed)
69 	{
70 		uint32_t n_allele_info, n_fmt_sample;
71 		uint32_t chrom, pos, rlen;
72 		uint32_t shared, indiv;
73 		float qual;
74 
75 		line_pos = 0;
76 
77 		get_number(shared, &line_pos, line);
78 		get_number(indiv, &line_pos, line);
79 		L_shared = shared;
80 		L_indiv = indiv;
81 
82 		get_number(chrom, &line_pos, line);
83 		get_number(pos, &line_pos, line);
84 		get_number(rlen, &line_pos, line);
85 
86 		qual = *reinterpret_cast<const float*>(&line[line_pos]);
87 		line_pos += sizeof(qual);
88 
89 		get_number(n_allele_info, &line_pos, line);
90 		get_number(n_fmt_sample, &line_pos, line);
91 
92 		N_format = n_fmt_sample >> 24;
93 		CHROM = entry_header.CONTIG_map[chrom].ID;
94 		POS = pos + 1;
95 		ID = get_typed_string( &line_pos, line );
96 		REF = get_typed_string( &line_pos, line );
97 		QUAL = qual;
98 
99 		N_allele = n_allele_info >> 16;
100 		N_info = n_allele_info & (uint32_t)65535;
101 
102 		ALT_pos = line_pos;
103 		for (unsigned int ui=1; ui<N_allele; ui++)
104 			skip_section(&line_pos, line);
105 
106 		FILTER_pos = line_pos;
107 		skip_section(&line_pos, line);
108 
109 		INFO_pos = line_pos;
110 		std::transform(REF.begin(), REF.end(), REF.begin(), ::toupper);
111 
112 		basic_parsed = true;
113 	}
114 	if (parse_ALT && !parsed_ALT)
115 		set_ALT(N_allele);
116 	if (parse_FILTER && !parsed_FILTER)
117 		set_FILTER();
118 	if (parse_INFO && !parsed_INFO)
119 		set_INFO();
120 }
121 
parse_full_entry(bool parse_FORMAT)122 void bcf_entry::parse_full_entry(bool parse_FORMAT)
123 {
124 	if (fully_parsed)
125 		return;
126 
127 	if (basic_parsed == false)
128 		parse_basic_entry();
129 
130 	FORMAT_pos = L_shared + 2*sizeof(uint32_t);
131 	FORMAT_positions.resize(N_format);
132 	FORMAT_types.resize(N_format);
133 	FORMAT_sizes.resize(N_format);
134 	FORMAT_skip.resize(N_format);
135 	FORMAT_keys.resize(N_format);
136 
137 	if (parse_FORMAT)
138 		set_FORMAT();
139 
140 	fully_parsed = true;
141 }
142 
143 // Filter specific genotypes by quality
filter_genotypes_by_quality(double min_genotype_quality)144 void bcf_entry::filter_genotypes_by_quality(double min_genotype_quality)
145 {
146 	if (fully_parsed == false)
147 		parse_full_entry();
148 
149 	if (GQ_idx != -1)
150 	{	// Have quality info
151 		double quality;
152 		for (unsigned int ui=0; ui<N_indv; ui++)
153 		{
154 			if (parsed_GQ[ui] == false)
155 				parse_genotype_entry(ui, false, true);
156 			quality = get_indv_GQUALITY(ui);
157 			if (quality < min_genotype_quality)
158 				include_genotype[ui] = false;
159 		}
160 	}
161 }
162 
163 // Set the include_genotype flag on the basis of depth
filter_genotypes_by_depth(int min_depth,int max_depth)164 void bcf_entry::filter_genotypes_by_depth(int min_depth, int max_depth)
165 {
166 	if (fully_parsed == false)
167 		parse_full_entry();
168 
169 	if (DP_idx != -1)
170 	{	// Have depth info
171 		int depth;
172 		for (unsigned int ui=0; ui<N_indv; ui++)
173 		{
174 			if (parsed_DP[ui] == false)
175 				parse_genotype_entry(ui, false, false, true);
176 			depth = get_indv_DEPTH(ui);
177 			if ((depth < min_depth) || (depth > max_depth))
178 				include_genotype[ui] = false;
179 		}
180 	}
181 }
182 
filter_genotypes_by_filter_status(const set<string> & filter_flags_to_remove,bool remove_all)183 void bcf_entry::filter_genotypes_by_filter_status(const set<string> &filter_flags_to_remove, bool remove_all)
184 {
185 	if (fully_parsed == false)
186 		parse_full_entry();
187 
188 	vector<string> GFILTERs;
189 	if (FT_idx != -1)
190 	{	// Have GFilter info
191 		for (unsigned int ui=0; ui<N_indv; ui++)
192 		{
193 			if (parsed_FT[ui] == false)
194 				parse_genotype_entry(ui, false, false, false, true);
195 			get_indv_GFILTER_vector(ui, GFILTERs);
196 
197 			if (remove_all == true)
198 			{	// If removing all filters, only keep things with label PASS
199 				if (!GFILTERs.empty())
200 					if ((GFILTERs[0] != "PASS") && (GFILTERs[0] != "."))
201 						include_genotype[ui] = false;
202 			}
203 			else
204 			{	// Only removing specific filters
205 				for (unsigned int uj=0; uj<GFILTERs.size(); uj++)
206 					if (filter_flags_to_remove.find(GFILTERs[uj]) != filter_flags_to_remove.end())
207 							include_genotype[ui] = false;
208 			}
209 		}
210 	}
211 }
212 
parse_genotype_entry(unsigned int indv,bool GT,bool GQ,bool DP,bool FT)213 void bcf_entry::parse_genotype_entry(unsigned int indv, bool GT, bool GQ, bool DP, bool FT)
214 {
215 	if (fully_parsed == false)
216 		parse_full_entry(true);
217 
218 	if (parsed_FORMAT == false)
219 		set_FORMAT();
220 
221 	unsigned int l_pos, indv_size, type, size, ui;
222 	static vector<int> ids;
223 	    ids.resize(0);
224 
225 	if ( GT && !parsed_GT[indv] && GT_idx != -1 )
226 		ids.push_back(GT_idx);
227 	if (GQ && !parsed_GQ[indv] && GQ_idx != -1)
228 		ids.push_back(GQ_idx);
229 	if (DP && !parsed_DP[indv] && DP_idx != -1)
230 		ids.push_back(DP_idx);
231 	if (FT && !parsed_FT[indv] && FT_idx != -1)
232 		ids.push_back(FT_idx);
233 
234 	for(unsigned int i=0; i<ids.size(); i++)
235 	{
236 		ui = ids[i];
237 		type = FORMAT_types[ui];
238 		size = FORMAT_sizes[ui];
239 		indv_size = FORMAT_skip[ui];
240 		l_pos = FORMAT_positions[ui] + indv*indv_size;
241 
242 		if ((int)ui == GT_idx)
243 			set_indv_GENOTYPE_and_PHASE(indv, l_pos, size);
244 		else if ((int)ui == GQ_idx)
245 		{
246 			if (size>1)
247 				LOG.error("Error: Only expect single value for QUALITY.\n");
248 
249 			float tmp;
250 			if (type==5)
251 				tmp = *reinterpret_cast<const float*>(&line[l_pos]);
252 			else if (type==1)
253 			{
254 				int8_t tmp2 = *reinterpret_cast<const int8_t*>(&line[l_pos]);
255 				tmp = (float)tmp2;
256 			}
257 			else if (type==2)
258 			{
259 				int16_t tmp2 = *reinterpret_cast<const int16_t*>(&line[l_pos]);
260 				tmp = (float)tmp2;
261 			}
262 			else if (type==3)
263 			{
264 				int32_t tmp2 = *reinterpret_cast<const int32_t*>(&line[l_pos]);
265 				tmp = (float)tmp2;
266 			}
267 			else
268 				LOG.error("Error: Invalid type for QUALITY.\n");
269 
270 			set_indv_GQUALITY(indv, tmp);
271 		}
272 		else if ((int)ui == DP_idx)
273 		{
274 			if (size>1)
275 				LOG.error("Error: Only expect single value for DEPTH.\n");
276 
277 			int tmp = -1;
278 
279 			if (type==1)
280 			{
281 				if ( !check_missing(l_pos, 1, line) )
282 					tmp = *reinterpret_cast<const int8_t*>(&line[l_pos]);
283 			}
284 			else if (type==2)
285 			{
286 				if ( !check_missing(l_pos, 2, line) )
287 					tmp = *reinterpret_cast<const int16_t*>(&line[l_pos]);
288 			}
289 			else if (type==3)
290 			{
291 				if ( !check_missing(l_pos, 3, line) )
292 					tmp = *reinterpret_cast<const int32_t*>(&line[l_pos]);
293 			}
294 			else if (type==5)
295 			{
296 				float tmp2 = -1;
297 				if ( !check_missing(l_pos, 5, line) )
298 					tmp2 = *reinterpret_cast<const float*>(&line[l_pos]);
299 				tmp = (int)tmp2;
300 			}
301 			else
302 				LOG.error("Error: Invalid type for DEPTH.\n");
303 
304 			set_indv_DEPTH(indv, tmp);
305 		}
306 		else if ((int)ui == FT_idx)
307 		{
308 			if (type == 7)
309 			{
310 				vector<char> tmp;
311 				tmp.resize( size*sizeof(char) );
312 				memcpy(&tmp[0], &line[l_pos], size*sizeof(char));
313 				set_indv_GFILTER(indv, tmp);
314 			}
315 			else
316 				LOG.one_off_warning("Warning: FT values must be encoded in string format.\n");
317 		}
318 	}
319 	// Set missing return values if requested a value, but couldn't find it
320 	if (GT && (parsed_GT[indv] == false))
321 	{
322 		set_indv_GENOTYPE_and_PHASE(indv, make_pair(-1,-1), '/');
323 	}
324 	if (GQ && (parsed_GQ[indv] == false))
325 	{
326 		set_indv_GQUALITY(indv, -1);
327 	}
328 	if (DP && (parsed_DP[indv] == false))
329 	{
330 		set_indv_DEPTH(indv, -1);
331 	}
332 	if (FT && (parsed_FT[indv] == false))
333 	{
334 		set_indv_GFILTER(indv, "");
335 	}
336 }
337 
parse_genotype_entries(bool GT,bool GQ,bool DP,bool FT)338 void bcf_entry::parse_genotype_entries(bool GT, bool GQ, bool DP, bool FT)
339 {
340 	for (unsigned int ui=0; ui<N_indv; ui++)
341 		parse_genotype_entry(ui, GT, GQ, DP, FT);
342 }
343 
read_indv_generic_entry(unsigned int indv,const string & FORMAT_id,string & out)344 void bcf_entry::read_indv_generic_entry(unsigned int indv, const string &FORMAT_id, string &out)
345 {
346 	read_indv_generic_entry(indv, FORMAT_to_idx[FORMAT_id], out);
347 }
348 
read_indv_generic_entry(unsigned int indv,const int & idx,string & out)349 void bcf_entry::read_indv_generic_entry(unsigned int indv, const int &idx, string &out)
350 {
351 	if (fully_parsed == false)
352 		parse_full_entry(true);
353 
354 	if (parsed_FORMAT == false)
355 		set_FORMAT();
356 
357 	if(idx == GT_idx && !parsed_GT[indv])
358 		parse_genotype_entry(indv, true);
359 
360 	out = ".";
361 	outstream.str("");
362 
363 	string tmpstr;
364 	unsigned int l_pos, type, size;
365 	bool miss, format_miss;
366 
367 	l_pos = FORMAT_positions[idx] + FORMAT_skip[idx]*indv;
368 	type = FORMAT_types[idx];
369 	size = FORMAT_sizes[idx];
370 
371 	format_miss = true;
372 	if(type == 1)
373 	{
374 		int8_t tmp;
375 		if (idx == GT_idx)
376 		{
377 			pair<int, int> genotype;
378 			char phase;
379 
380 			get_indv_GENOTYPE_ids(indv, genotype);
381 			phase = get_indv_PHASE(indv);
382 			if ((genotype.first == -2) && (genotype.second == -2))
383 				outstream << ".";
384 			else if ((genotype.first == -1) && (genotype.second == -2))
385 				outstream << ".";
386 			else if ((genotype.first > -1) && (genotype.second == -2))
387 				outstream << genotype.first;
388 			else if ((genotype.first > -1) && (genotype.second > -1))
389 				outstream << genotype.first << phase << genotype.second;
390 			else
391 				outstream << genotype.first << phase << genotype.second;
392 
393 			out = outstream.str();
394 		}
395 		else
396 		{
397 			format_miss = true;
398 			for (unsigned int uj=0; uj<size; uj++)
399 			{
400 				if (check_end(l_pos, type, line))
401 					break;
402 
403 				miss = check_missing(l_pos, type, line);
404 				if (uj != 0)
405 					outstream << ",";
406 
407 				if (miss)
408 					outstream << ".";
409 				else
410 				{
411 					tmp = *reinterpret_cast<const int8_t*>(&line[l_pos]);
412 					outstream << int(tmp);
413 				}
414 				l_pos += sizeof(int8_t);
415 				format_miss = format_miss && miss;
416 			}
417 
418 			tmpstr = outstream.str();
419 			if ( (tmpstr.length() > 0) and !format_miss)
420 				out = tmpstr;
421 		}
422 	}
423 	else if (type == 2)
424 	{
425 		int16_t tmp;
426 		format_miss = true;
427 
428 		for (unsigned int uj=0; uj<size; uj++)
429 		{
430 			if (check_end(l_pos, type, line))
431 				break;
432 
433 			miss = check_missing(l_pos, type, line);
434 			if (uj != 0)
435 				outstream << ",";
436 
437 			if (miss)
438 				outstream << ".";
439 			else
440 			{
441 				tmp = *reinterpret_cast<const int16_t*>(&line[l_pos]);
442 				outstream << int(tmp);
443 			}
444 			l_pos += sizeof(int16_t);
445 			format_miss = format_miss && miss;
446 		}
447 
448 		tmpstr = outstream.str();
449 		if ( (tmpstr.length() > 0) and !format_miss )
450 			out = tmpstr;
451 	}
452 	else if (type == 3)
453 	{
454 		int32_t tmp;
455 		format_miss = true;
456 
457 		for (unsigned int uj=0; uj<size; uj++)
458 		{
459 			if (check_end(l_pos, type, line))
460 				break;
461 
462 			miss = check_missing(l_pos, type, line);
463 			if (uj != 0)
464 				outstream << ",";
465 
466 			if (miss)
467 				outstream << ".";
468 			else
469 			{
470 				tmp = *reinterpret_cast<const int32_t*>(&line[l_pos]);
471 				outstream << int(tmp);
472 			}
473 			l_pos += sizeof(int32_t);
474 			format_miss = format_miss && miss;
475 		}
476 
477 		tmpstr = outstream.str();
478 		if ( (tmpstr.length() > 0) and !format_miss )
479 			out = tmpstr;
480 	}
481 	else if (type == 5)
482 	{
483 		float tmp;
484 		format_miss = true;
485 
486 		for (unsigned int uj=0; uj<size; uj++)
487 		{
488 			if (check_end(l_pos, type, line))
489 				break;
490 
491 			miss = check_missing(l_pos, type, line);
492 			if (uj != 0)
493 				outstream << ",";
494 
495 			if (miss)
496 				outstream << ".";
497 			else
498 			{
499 				tmp = *reinterpret_cast<const float*>(&line[l_pos]);
500 				outstream << float(tmp);
501 			}
502 			l_pos += sizeof(float);
503 			format_miss = format_miss && miss;
504 		}
505 
506 		tmpstr = outstream.str();
507 		if ( (tmpstr.length() > 0) and !format_miss )
508 			out = tmpstr;
509 	}
510 	else if (type == 7)
511 	{
512 		stringstream str_stream;
513 		string tmp_string;
514 		char tmp = '.';
515 
516 		for (unsigned int uj=0; uj<size; uj++)
517 		{
518 			tmp = *reinterpret_cast<const char*>(&line[l_pos]);
519 			l_pos += sizeof(char);
520 			str_stream << tmp;
521 		}
522 		tmp_string = str_stream.str();
523 		tmp_string.erase( remove( tmp_string.begin(), tmp_string.end(), ' ' ), tmp_string.end() );
524 
525 		if (tmp_string != "")
526 			out = tmp;
527 		else
528 			out = ".";
529 	}
530 }
531 
read_all_entries(string & out)532 void bcf_entry::read_all_entries(string &out)
533 {
534 	if (fully_parsed == false)
535 		parse_full_entry(true);
536 
537 	if (parsed_FORMAT == false)
538 		set_FORMAT();
539 
540 	ostringstream outstream;
541 	string tmpstr;
542 	outstream.str("");
543 	tmpstream.str("");
544 	bool format_miss, indv_miss;
545 
546 	for(unsigned int ui=0; ui<N_indv; ui++)
547 	{
548 		if(include_indv[ui] == false)
549 			continue;
550 		outstream << "\t";
551 		indv_miss = true;
552 
553 		for(unsigned int uj=0; uj<N_format; uj++)
554 		{
555 			if (uj != 0)
556 				tmpstream << ":";
557 
558 			if ( ((int)uj == GT_idx) && (include_genotype[ui] == false) )
559 			{
560 				tmpstr = ".";
561 				for (int uk=1; uk<ploidy[ui]; uk++)
562 					tmpstr = tmpstr + "/.";
563 				format_miss = false;
564 			}
565 			else if ((int)uj == GT_idx)
566 			{
567 				read_indv_generic_entry(ui, uj, tmpstr);
568 				format_miss = false;
569 			}
570 			else
571 			{
572 				read_indv_generic_entry(ui, uj, tmpstr);
573 				format_miss = (tmpstr == ".");
574 			}
575 			indv_miss = indv_miss && format_miss;
576 			tmpstream << tmpstr;
577 
578 			if (!format_miss)
579 			{
580 				tmpstr = tmpstream.str();
581 				outstream << tmpstr;
582 				tmpstream.str("");
583 			}
584 		}
585 		tmpstream.str("");
586 	}
587 	out = outstream.str();
588 }
589 
590 // Output BCF entry to output stream in VCF format
print(ostream & out,const set<string> & INFO_to_keep,bool keep_all_INFO)591 void bcf_entry::print(ostream &out, const set<string> &INFO_to_keep, bool keep_all_INFO)
592 {
593 	if (fully_parsed == false)
594 		parse_full_entry();
595 
596 	out << get_CHROM() << '\t' << POS << '\t' << get_ID() << '\t' << REF << '\t' << get_ALT();
597 	out << '\t' << header::double2str(QUAL);
598 	out << '\t' << get_FILTER();
599 	out << '\t' << get_INFO(INFO_to_keep, keep_all_INFO);
600 
601 	if (FORMAT.size() > 0)
602 	{
603 		string indv_entries;
604 		out << '\t' << get_FORMAT();
605 
606 		read_all_entries(indv_entries);
607 		out << indv_entries;
608 	}
609 	out << '\n';	// endl flushes the buffer, which is slow. This (should be) quicker.
610 }
611 
612 // Output BCF entry to output stream in BCF format
print_bcf(BGZF * out,const set<string> & INFO_to_keep,bool keep_all_INFO)613 void bcf_entry::print_bcf(BGZF* out, const set<string> &INFO_to_keep, bool keep_all_INFO)
614 {
615 	if (fully_parsed == false)
616 		parse_full_entry(true);
617 
618 	vector<char> out_vector, tmp_vector;
619 	vector<pair< string, string > > tmp_info;
620 	int index;
621 
622 	out_vector.resize(INFO_pos);
623 	memcpy(&out_vector[0], &line[0], INFO_pos);
624 
625 	if (keep_all_INFO)
626 	{
627 		unsigned int curr_size = out_vector.size();
628 		out_vector.resize(curr_size + (FORMAT_pos - INFO_pos) );
629 		memcpy(&out_vector[curr_size], &line[INFO_pos], (FORMAT_pos - INFO_pos));
630 	}
631 	else
632 	{
633 		int map_type, number;
634 		tmp_info = get_INFO_vector(INFO_to_keep, keep_all_INFO);
635 		N_INFO_removed = INFO.size()-tmp_info.size();
636 
637 		get_n_allele_info(tmp_vector);
638 		memcpy(&out_vector[6*sizeof(int32_t)], &tmp_vector[0], sizeof(char));
639 
640 		for(unsigned int ui=0; ui<tmp_info.size(); ui++)
641 		{
642 			tmp_vector.resize(0);
643 			index = entry_header.INFO_reverse_map[ tmp_info[ui].first ];
644 			make_typed_int(tmp_vector, index, true);
645 			out_vector.insert(out_vector.end(), tmp_vector.begin(), tmp_vector.end());
646 
647 			tmp_vector.resize(0);
648 			map_type = entry_header.INFO_map[ index ].Type;
649 			number = entry_header.INFO_map[ index ].N_entries;
650 
651 			if (map_type == Integer)
652 				make_typed_int_vector(tmp_vector, tmp_info[ui].second, number );
653 			else if (map_type == Float)
654 				make_typed_float_vector(tmp_vector, tmp_info[ui].second, number );
655 			else if ( (map_type == Character) or (map_type == String) )
656 				make_typed_string(tmp_vector, tmp_info[ui].second, true );
657 			else if (map_type == Flag)
658 				make_typed_int(tmp_vector, 1, true );
659 			else
660 				LOG.error("Invalid type in INFO definition", 0);
661 
662 			out_vector.insert(out_vector.end(), tmp_vector.begin(), tmp_vector.end());
663 		}
664 	}
665 
666 	uint32_t l_shared = (uint32_t)out_vector.size() - (uint32_t)(2*sizeof(uint32_t));
667 	memcpy(&out_vector[0], &l_shared, sizeof(uint32_t));
668 
669 	unsigned int size, l_pos, type;
670 	for (unsigned int ui=0; ui<N_format; ui++)
671 	{
672 		type = FORMAT_types[ui];
673 		size = FORMAT_sizes[ui];
674 
675 		tmp_vector.resize(0);
676 		make_typed_int(tmp_vector, FORMAT_keys[ui], true);
677 		out_vector.insert(out_vector.end(), tmp_vector.begin(), tmp_vector.end() );
678 
679 		tmp_vector.resize(0);
680 		make_type_size(tmp_vector, type, size);
681 		out_vector.insert(out_vector.end(), tmp_vector.begin(), tmp_vector.end() );
682 
683 		for (unsigned int uj=0; uj<N_indv; uj++)
684 		{
685 			if (include_indv[uj] == false)
686 				continue;
687 
688 			tmp_vector.resize(FORMAT_skip[ui]);
689 			l_pos = FORMAT_positions[ui] + FORMAT_skip[ui]*uj;
690 
691 			if ( ((int)uj == GT_idx) and (include_genotype[uj] == false) )
692 				for (unsigned int ploidy = 0; ploidy < FORMAT_skip[ui]; ploidy++)
693 					tmp_vector[ploidy] = (int8_t)0x81;
694 			else
695 				memcpy(&tmp_vector[0], &line[l_pos], FORMAT_skip[ui]);
696 
697 			out_vector.insert(out_vector.end(), tmp_vector.begin(), tmp_vector.end() );
698 		}
699 	}
700 	uint32_t l_indv = (uint32_t)out_vector.size() - l_shared - (uint32_t)(2*sizeof(uint32_t));
701 	memcpy(&out_vector[sizeof(uint32_t)], &l_indv, sizeof(uint32_t));
702 
703 	bgzf_write(out, &out_vector[0], out_vector.size());
704 }
705