1 /* The MIT License
2 
3    Copyright (c) 2016-2021 Genome Research Ltd.
4 
5    Author: Petr Danecek <pd3@sanger.ac.uk>
6 
7    Permission is hereby granted, free of charge, to any person obtaining a copy
8    of this software and associated documentation files (the "Software"), to deal
9    in the Software without restriction, including without limitation the rights
10    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11    copies of the Software, and to permit persons to whom the Software is
12    furnished to do so, subject to the following conditions:
13 
14    The above copyright notice and this permission notice shall be included in
15    all copies or substantial portions of the Software.
16 
17    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23    THE SOFTWARE.
24 
25  */
26 
27 #include <assert.h>
28 #include <strings.h>
29 #include <htslib/vcf.h>
30 #include <htslib/vcfutils.h>
31 #include <htslib/hts_os.h>
32 #include "bcftools.h"
33 #include "vcfbuf.h"
34 #include "rbuf.h"
35 
36 typedef struct
37 {
38     double max[VCFBUF_LD_N];
39     int rand_missing, filter1;
40 }
41 ld_t;
42 
43 typedef struct
44 {
45     bcf1_t *rec;
46     double af;
47     int af_set:1, filter:1, idx:30;
48 }
49 vcfrec_t;
50 
51 #define PRUNE_MODE_MAX_AF 1
52 #define PRUNE_MODE_1ST    2
53 #define PRUNE_MODE_RAND   3
54 typedef struct
55 {
56     int max_sites, mvrec, mac, mfarr, mode;
57     int *ac, *idx;
58     float *farr;
59     char *af_tag;
60     vcfrec_t **vrec;
61 }
62 prune_t;
63 
64 typedef struct
65 {
66     int active;
67 }
68 rmdup_t;
69 
70 typedef struct
71 {
72     int active, rid, end;
73 }
74 overlap_t;
75 
76 struct _vcfbuf_t
77 {
78     int win;
79     bcf_hdr_t *hdr;
80     vcfrec_t *vcf;
81     rbuf_t rbuf;
82     ld_t ld;
83     prune_t prune;
84     overlap_t overlap;
85     rmdup_t rmdup;
86 };
87 
vcfbuf_init(bcf_hdr_t * hdr,int win)88 vcfbuf_t *vcfbuf_init(bcf_hdr_t *hdr, int win)
89 {
90     vcfbuf_t *buf = (vcfbuf_t*) calloc(1,sizeof(vcfbuf_t));
91     buf->hdr = hdr;
92     buf->win = win;
93     buf->overlap.rid = -1;
94     int i;
95     for (i=0; i<VCFBUF_LD_N; i++) buf->ld.max[i] = HUGE_VAL;
96     rbuf_init(&buf->rbuf, 0);
97     return buf;
98 }
99 
vcfbuf_destroy(vcfbuf_t * buf)100 void vcfbuf_destroy(vcfbuf_t *buf)
101 {
102     int i;
103     for (i=0; i<buf->rbuf.m; i++)
104         if ( buf->vcf[i].rec ) bcf_destroy(buf->vcf[i].rec);
105     free(buf->vcf);
106     free(buf->prune.farr);
107     free(buf->prune.vrec);
108     free(buf->prune.ac);
109     free(buf->prune.idx);
110     free(buf);
111 }
112 
vcfbuf_set(vcfbuf_t * buf,vcfbuf_opt_t key,void * value)113 void vcfbuf_set(vcfbuf_t *buf, vcfbuf_opt_t key, void *value)
114 {
115     if ( key==LD_FILTER1 ) { buf->ld.filter1 = *((int*)value); return; }
116     if ( key==LD_RAND_MISSING ) { buf->ld.rand_missing = *((int*)value); return; }
117     if ( key==LD_MAX_R2 ) { buf->ld.max[VCFBUF_LD_IDX_R2] = *((double*)value); return; }
118     if ( key==LD_MAX_LD ) { buf->ld.max[VCFBUF_LD_IDX_LD] = *((double*)value); return; }
119     if ( key==LD_MAX_HD ) { buf->ld.max[VCFBUF_LD_IDX_HD] = *((double*)value); return; }
120 
121     if ( key==VCFBUF_NSITES )
122     {
123         buf->prune.max_sites = *((int*)value);
124         if ( !buf->prune.mode ) buf->prune.mode = PRUNE_MODE_MAX_AF;
125         return;
126     }
127     if ( key==VCFBUF_AF_TAG ) { buf->prune.af_tag = *((char**)value); return; }
128     if ( key==VCFBUF_OVERLAP_WIN ) { buf->overlap.active = *((int*)value); return; }
129     if ( key==VCFBUF_RMDUP) { buf->rmdup.active = *((int*)value); return; }
130 
131     if ( key==VCFBUF_NSITES_MODE )
132     {
133         char *mode = *((char**)value);
134         if ( !strcasecmp(mode,"maxAF") ) buf->prune.mode = PRUNE_MODE_MAX_AF;
135         else if ( !strcasecmp(mode,"1st") ) buf->prune.mode = PRUNE_MODE_1ST;
136         else if ( !strcasecmp(mode,"rand") ) buf->prune.mode = PRUNE_MODE_RAND;
137         else error("The mode \"%s\" is not recognised\n",mode);
138     }
139 }
140 
vcfbuf_nsites(vcfbuf_t * buf)141 int vcfbuf_nsites(vcfbuf_t *buf)
142 {
143     return buf->rbuf.n;
144 }
145 
vcfbuf_push(vcfbuf_t * buf,bcf1_t * rec)146 bcf1_t *vcfbuf_push(vcfbuf_t *buf, bcf1_t *rec)
147 {
148     rbuf_expand0(&buf->rbuf, vcfrec_t, buf->rbuf.n+1, buf->vcf);
149 
150     int i = rbuf_append(&buf->rbuf);
151     if ( !buf->vcf[i].rec ) buf->vcf[i].rec = bcf_init1();
152 
153     bcf1_t *ret = buf->vcf[i].rec;
154     buf->vcf[i].rec = rec;
155     buf->vcf[i].af_set = 0;
156     buf->vcf[i].filter = buf->ld.filter1;
157     buf->ld.filter1 = 0;
158 
159     return ret;
160 }
161 
vcfbuf_peek(vcfbuf_t * buf,int idx)162 bcf1_t *vcfbuf_peek(vcfbuf_t *buf, int idx)
163 {
164     int i = rbuf_kth(&buf->rbuf, idx);
165     return i<0 ? NULL : buf->vcf[i].rec;
166 }
167 
vcfbuf_remove(vcfbuf_t * buf,int idx)168 bcf1_t *vcfbuf_remove(vcfbuf_t *buf, int idx)
169 {
170     int i = rbuf_kth(&buf->rbuf, idx);
171     if ( i<0 ) return NULL;
172     bcf1_t *rec = buf->vcf[i].rec;
173 	rbuf_remove_kth(&buf->rbuf, vcfrec_t, idx, buf->vcf);
174     return rec;
175 }
176 
cmpvrec(const void * _a,const void * _b)177 static int cmpvrec(const void *_a, const void *_b)
178 {
179     vcfrec_t *a = *((vcfrec_t**) _a);
180     vcfrec_t *b = *((vcfrec_t**) _b);
181     if ( a->af < b->af ) return -1;
182     if ( a->af == b->af ) return 0;
183     return 1;
184 }
cmpint_desc(const void * _a,const void * _b)185 static int cmpint_desc(const void *_a, const void *_b)
186 {
187     int a = *((int*)_a);
188     int b = *((int*)_b);
189     if ( a < b ) return 1;
190     if ( a == b ) return 0;
191     return -1;
192 }
193 
_prune_sites(vcfbuf_t * buf,int flush_all)194 static void _prune_sites(vcfbuf_t *buf, int flush_all)
195 {
196     int nbuf = flush_all ? buf->rbuf.n : buf->rbuf.n - 1;
197 
198     int nprune = nbuf - buf->prune.max_sites;
199     int i,k,irec = 0;
200     if ( buf->prune.mode==PRUNE_MODE_1ST )
201     {
202         int eoff = flush_all ? 1 : 2;
203         for (i=0; i<nprune; i++)
204             rbuf_remove_kth(&buf->rbuf, vcfrec_t, buf->rbuf.n - eoff, buf->vcf);
205         return;
206     }
207     if ( buf->prune.mode==PRUNE_MODE_RAND )
208     {
209         int eoff = flush_all ? 0 : 1;
210         for (i=0; i<nprune; i++)
211         {
212             int j = (buf->rbuf.n - eoff) * hts_drand48();
213             rbuf_remove_kth(&buf->rbuf, vcfrec_t, j, buf->vcf);
214         }
215         return;
216     }
217 
218     if ( nbuf > buf->prune.mvrec )
219     {
220         buf->prune.idx   = (int*) realloc(buf->prune.idx, nbuf*sizeof(int));
221         buf->prune.vrec  = (vcfrec_t**) realloc(buf->prune.vrec, nbuf*sizeof(vcfrec_t*));
222         buf->prune.mvrec = nbuf;
223     }
224 
225     // set allele frequency and prepare buffer for sorting
226     for (i=-1; rbuf_next(&buf->rbuf,&i) && irec<nbuf; )
227     {
228         bcf1_t *line = buf->vcf[i].rec;
229         if ( line->n_allele > buf->prune.mac )
230         {
231             buf->prune.ac = (int*) realloc(buf->prune.ac, line->n_allele*sizeof(*buf->prune.ac));
232             buf->prune.mac = line->n_allele;
233         }
234         if ( !buf->vcf[i].af_set )
235         {
236             buf->vcf[i].af = 0;
237             if ( buf->prune.af_tag )
238             {
239                 if ( bcf_get_info_float(buf->hdr,line,buf->prune.af_tag,&buf->prune.farr, &buf->prune.mfarr) > 0 ) buf->vcf[i].af = buf->prune.farr[0];
240             }
241             else if ( bcf_calc_ac(buf->hdr, line, buf->prune.ac, BCF_UN_INFO|BCF_UN_FMT) )
242             {
243                 int ntot = buf->prune.ac[0], nalt = 0;
244                 for (k=1; k<line->n_allele; k++) nalt += buf->prune.ac[k];
245                 buf->vcf[i].af = ntot ? (float)nalt/ntot : 0;
246             }
247             buf->vcf[i].af_set = 1;
248         }
249         buf->vcf[i].idx = irec;
250         buf->prune.vrec[irec++] = &buf->vcf[i];
251     }
252 
253     // sort by allele frequency, low AF will be removed preferentially
254     qsort(buf->prune.vrec, nbuf, sizeof(*buf->prune.vrec), cmpvrec);
255 
256     // sort the rbuf indexes to be pruned descendently so that j-th rbuf index
257     // is removed before i-th index if i<j
258     for (i=0; i<nprune; i++)
259         buf->prune.idx[i] = buf->prune.vrec[i]->idx;
260 
261     qsort(buf->prune.idx, nprune, sizeof(int), cmpint_desc);
262 
263     for (i=0; i<nprune; i++)
264         rbuf_remove_kth(&buf->rbuf, vcfrec_t, buf->prune.idx[i], buf->vcf);
265 }
266 
_rmdup_can_flush(vcfbuf_t * buf,int flush_all)267 static int _rmdup_can_flush(vcfbuf_t *buf, int flush_all)
268 {
269     if ( flush_all ) return 1;
270 
271     if ( buf->rbuf.n==1 ) return 0;
272 
273     int k1 = rbuf_kth(&buf->rbuf, -1);
274     int k2 = rbuf_kth(&buf->rbuf, -2);
275 
276     vcfrec_t *rec1 = &buf->vcf[k1];
277     vcfrec_t *rec2 = &buf->vcf[k2];
278 
279     if ( rec1->rec->rid!=rec2->rec->rid ) return 1;
280     if ( rec1->rec->pos!=rec2->rec->pos ) return 1;
281 
282     return 0;
283 }
284 
_overlap_can_flush(vcfbuf_t * buf,int flush_all)285 static int _overlap_can_flush(vcfbuf_t *buf, int flush_all)
286 {
287     if ( flush_all ) { buf->overlap.rid = -1; return 1; }
288 
289     int i = rbuf_last(&buf->rbuf);
290     vcfrec_t *last = &buf->vcf[i];
291     if ( buf->overlap.rid != last->rec->rid ) buf->overlap.end = 0;
292 
293     int beg_pos = last->rec->pos;
294     int end_pos = last->rec->pos + last->rec->rlen - 1;
295 
296     // Assuming left-aligned indels. In case it is a deletion, the real variant
297     // starts one base after. If an insertion, the overlap with previous zero length.
298     int imin = last->rec->rlen;
299     for (i=0; i<last->rec->n_allele; i++)
300     {
301         char *ref = last->rec->d.allele[0];
302         char *alt = last->rec->d.allele[i];
303         if ( *alt == '<' ) continue;    // ignore symbolic alleles
304         while ( *ref && *alt && nt_to_upper(*ref)==nt_to_upper(*alt) ) { ref++; alt++; }
305         if ( imin > ref - last->rec->d.allele[0] ) imin = ref - last->rec->d.allele[0];
306     }
307 
308     if ( beg_pos <= buf->overlap.end )
309     {
310         beg_pos += imin;
311         if ( beg_pos > end_pos ) end_pos = beg_pos;
312     }
313 
314     if ( buf->rbuf.n==1 )
315     {
316         buf->overlap.rid = last->rec->rid;
317         buf->overlap.end = end_pos;
318         return 0;
319     }
320     if ( beg_pos <= buf->overlap.end )
321     {
322         if ( buf->overlap.end < end_pos ) buf->overlap.end = end_pos;
323         return 0;
324     }
325     return 1;
326 }
327 
vcfbuf_flush(vcfbuf_t * buf,int flush_all)328 bcf1_t *vcfbuf_flush(vcfbuf_t *buf, int flush_all)
329 {
330     int i,j;
331 
332     if ( buf->rbuf.n==0 ) return NULL;
333     if ( flush_all ) goto ret;
334 
335     i = rbuf_kth(&buf->rbuf, 0);    // first
336     j = rbuf_last(&buf->rbuf);      // last
337 
338     if ( buf->vcf[i].rec->rid != buf->vcf[j].rec->rid ) goto ret;
339     if ( buf->overlap.active && _overlap_can_flush(buf, flush_all) ) goto ret;
340     if ( buf->rmdup.active && _rmdup_can_flush(buf, flush_all) ) goto ret;
341 
342     if ( buf->win > 0 )
343     {
344         if ( buf->rbuf.n <= buf->win ) return NULL;
345         goto ret;
346     }
347     else if ( buf->win < 0 )
348     {
349         if ( buf->vcf[i].rec->pos - buf->vcf[j].rec->pos > buf->win ) return NULL;
350     }
351     else return NULL;
352 
353 ret:
354     if ( buf->prune.max_sites && buf->prune.max_sites < buf->rbuf.n ) _prune_sites(buf, flush_all);
355 
356     i = rbuf_shift(&buf->rbuf);
357     return buf->vcf[i].rec;
358 }
359 
_estimate_af(int8_t * ptr,int size,int nvals,int nsamples)360 static double _estimate_af(int8_t *ptr, int size, int nvals, int nsamples)
361 {
362     int i,j, nref = 0, nalt = 0;
363     for (i=0; i<nsamples; i++)
364     {
365         for (j=0; j<nvals; j++)
366         {
367             if ( ptr[j]==bcf_gt_missing ) break;
368             if ( ptr[j]==bcf_int8_vector_end ) break;
369             if ( bcf_gt_allele(ptr[j]) ) nalt++;
370             else nref++;
371         }
372         ptr += size;
373     }
374     if ( nref+nalt == 0 ) return 0;
375     return (double)nalt/(nref+nalt);
376 }
377 
378 /*
379     The `ld` is set to D approximated as suggested in https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2710162/
380         D =~ (GT correlation) * sqrt(Pa*(1-Pa)*Pb*(1-Pb))
381 
382     and `hd` as proposed in Ragsdale, A. P., & Gravel, S. (2019). Unbiased estimation of linkage
383     disequilibrium from unphased data.  Molecular Biology and Evolution. doi:10.1093/molbev/msz265
384 
385         \hat{D} = 1/[n*(n+1)]*[
386                              (n1 + n2/2 + n4/2 + n5/4)*(n5/4 + n6/2 + n8/2 + n9)
387                             -(n2/2 + n3 + n5/4 + n6/2)*(n4/2 + n5/4 + n7 + n8/2)
388                         ]
389     where n1,n2,..n9 are counts of RR/RR,RR/RA,..,AA/AA genotypes.
390 
391     Returns 0 on success, -1 if the values could not be determined (missing genotypes)
392 */
_calc_r2_ld(vcfbuf_t * buf,bcf1_t * arec,bcf1_t * brec,vcfbuf_ld_t * ld)393 static int _calc_r2_ld(vcfbuf_t *buf, bcf1_t *arec, bcf1_t *brec, vcfbuf_ld_t *ld)
394 {
395     if ( arec->n_sample!=brec->n_sample ) error("Different number of samples: %d vs %d\n",arec->n_sample,brec->n_sample);
396     assert( arec->n_sample );
397 
398     int i,j,igt = bcf_hdr_id2int(buf->hdr, BCF_DT_ID, "GT");
399     bcf_unpack(arec, BCF_UN_FMT);
400     bcf_unpack(brec, BCF_UN_FMT);
401     bcf_fmt_t *afmt = NULL, *bfmt = NULL;
402     for (i=0; i<arec->n_fmt; i++)
403         if ( arec->d.fmt[i].id==igt ) { afmt = &arec->d.fmt[i]; break; }
404     if ( !afmt ) return -1;  // no GT tag
405     for (i=0; i<brec->n_fmt; i++)
406         if ( brec->d.fmt[i].id==igt ) { bfmt = &brec->d.fmt[i]; break; }
407     if ( !bfmt ) return -1;  // no GT tag
408 
409     if ( afmt->n==0 ) return -1;   // empty?!
410     if ( bfmt->n==0 ) return -1;   // empty?!
411     if ( afmt->type!=BCF_BT_INT8 ) error("TODO: the GT fmt_type is not int8!\n");
412     if ( bfmt->type!=BCF_BT_INT8 ) error("TODO: the GT fmt_type is not int8!\n");
413 
414     // Determine allele frequencies, this is to sample randomly missing genotypes
415     double aaf = 0, baf = 0;
416     if ( buf->ld.rand_missing )
417     {
418         aaf = _estimate_af((int8_t*)afmt->p, afmt->size, afmt->n, arec->n_sample);
419         baf = _estimate_af((int8_t*)bfmt->p, bfmt->size, bfmt->n, brec->n_sample);
420     }
421 
422     // Calculate r2, lf, hd
423     double nhd[] = {0,0,0,0,0,0,0,0,0};
424     double ab = 0, aa = 0, bb = 0, a = 0, b = 0;
425     int nab = 0, ndiff = 0;
426     int an_tot = 0, bn_tot = 0;
427     for (i=0; i<arec->n_sample; i++)
428     {
429         int8_t *aptr = (int8_t*) (afmt->p + i*afmt->size);
430         int8_t *bptr = (int8_t*) (bfmt->p + i*bfmt->size);
431         int adsg = 0, bdsg = 0;     // dosages (0,1,2) at sites (a,b)
432         int an = 0, bn = 0;         // number of alleles at sites (a,b)
433         for (j=0; j<afmt->n; j++)
434         {
435             if ( aptr[j]==bcf_int8_vector_end ) break;
436             if ( aptr[j]==bcf_gt_missing )
437             {
438                 if ( !buf->ld.rand_missing ) break;
439                 if ( hts_drand48() >= aaf ) adsg += 1;
440             }
441             else if ( bcf_gt_allele(aptr[j]) ) adsg += 1;
442             an++;
443         }
444         for (j=0; j<bfmt->n; j++)
445         {
446             if ( bptr[j]==bcf_int8_vector_end ) break;
447             if ( bptr[j]==bcf_gt_missing )
448             {
449                 if ( !buf->ld.rand_missing ) break;
450                 if ( hts_drand48() >= baf ) bdsg += 1;
451             }
452             else if ( bcf_gt_allele(bptr[j]) ) bdsg += 1;
453             bn++;
454         }
455         if ( an && bn )
456         {
457             an_tot += an;
458             aa += adsg*adsg;
459             a  += adsg;
460 
461             bn_tot += bn;
462             bb += bdsg*bdsg;
463             b  += bdsg;
464 
465             if ( adsg!=bdsg ) ndiff++;
466             ab += adsg*bdsg;
467             nab++;
468         }
469         if ( an==2 && bn==2 )   // for now only diploid genotypes
470         {
471             assert( adsg<=2 && bdsg<=2 );
472             nhd[ bdsg*3 + adsg ]++;
473         }
474     }
475     if ( !nab ) return -1;  // no data in common for the two sites
476 
477     double pa = a/an_tot;
478     double pb = b/bn_tot;
479     double cor;
480     if ( !ndiff ) cor = 1;
481     else
482     {
483         if ( aa == a*a/nab || bb == b*b/nab )     // zero variance, add small noise
484         {
485             aa += 1e-4;
486             bb += 1e-4;
487             ab += 1e-4;
488             a  += 1e-2;
489             b  += 1e-2;
490             nab++;
491         }
492         cor = (ab - a*b/nab) / sqrt(aa - a*a/nab) / sqrt(bb - b*b/nab);
493     }
494 
495     ld->val[VCFBUF_LD_IDX_R2] = cor * cor;
496 
497     // Lewontin's normalization of D. Also we cap at 1 as the calculation
498     // can result in values bigger than 1 for high AFs.
499     ld->val[VCFBUF_LD_IDX_LD] = cor * sqrt(pa*(1-pa)*pb*(1-pb));
500     double norm;
501     if ( ld->val[VCFBUF_LD_IDX_LD] < 0 )
502         norm = -pa*pb > -(1-pa)*(1-pb) ? -pa*pb : -(1-pa)*(1-pb);
503     else
504         norm = pa*(1-pb) > (1-pa)*pb ? pa*(1-pb) : (1-pa)*pb;
505     if ( norm )
506         ld->val[VCFBUF_LD_IDX_LD] = fabs(norm) > fabs(ld->val[VCFBUF_LD_IDX_LD]) ? ld->val[VCFBUF_LD_IDX_LD]/norm : 1;
507     if ( !ld->val[VCFBUF_LD_IDX_LD] )
508         ld->val[VCFBUF_LD_IDX_LD] = fabs(ld->val[VCFBUF_LD_IDX_LD]);    // avoid "-0" on output
509 
510     ld->val[VCFBUF_LD_IDX_HD] =
511         (nhd[0] + nhd[1]/2. + nhd[3]/2. + nhd[4]/4.)*(nhd[4]/4. + nhd[5]/2. + nhd[7]/2. + nhd[8])
512         - (nhd[1]/2. + nhd[2] + nhd[4]/4. + nhd[5]/2.)*(nhd[3]/2. + nhd[4]/4. + nhd[6] + nhd[7]/2.);
513     ld->val[VCFBUF_LD_IDX_HD] /= nab;
514     ld->val[VCFBUF_LD_IDX_HD] /= nab+1;
515 
516     return 0;
517 }
518 
vcfbuf_ld(vcfbuf_t * buf,bcf1_t * rec,vcfbuf_ld_t * ld)519 int vcfbuf_ld(vcfbuf_t *buf, bcf1_t *rec, vcfbuf_ld_t *ld)
520 {
521     int ret = -1;
522     if ( !buf->rbuf.n ) return ret;
523 
524     int j, i = buf->rbuf.f;
525 
526     // Relying on vcfbuf being properly flushed - all sites in the buffer
527     // must come from the same chromosome
528     if ( buf->vcf[i].rec->rid != rec->rid ) return ret;
529 
530     vcfbuf_ld_t tmp;
531     for (j=0; j<VCFBUF_LD_N; j++)
532     {
533         ld->val[j] = -HUGE_VAL;
534         ld->rec[j] = NULL;
535     }
536 
537     for (i=-1; rbuf_next(&buf->rbuf,&i); )
538     {
539         if ( buf->vcf[i].filter ) continue;
540         if ( _calc_r2_ld(buf, buf->vcf[i].rec, rec, &tmp) < 0 ) continue;   // missing genotypes
541 
542         int done = 0;
543         for (j=0; j<VCFBUF_LD_N; j++)
544         {
545             if ( ld->val[j] < tmp.val[j] )
546             {
547                 ld->val[j] = tmp.val[j];
548                 ld->rec[j] = buf->vcf[i].rec;
549             }
550             if ( buf->ld.max[j] < tmp.val[j] ) done = 1;
551             ret = 0;
552         }
553         if ( done ) return ret;
554     }
555     return ret;
556 }
557 
558 
559