1#!/usr/bin/env python
2
3"""
4For each interval in `bed1` print the fraction of bases covered by `bed2`.
5
6usage: %prog bed1 bed2 [mask]
7"""
8
9import sys
10
11from bx.bitset import BinnedBitSet
12from bx.bitset_builders import binned_bitsets_from_file
13
14bed1_fname, bed2_fname = sys.argv[1:3]
15
16bitsets = binned_bitsets_from_file(open(bed2_fname))
17
18
19def clone(bits):
20    b = BinnedBitSet(bits.size)
21    b.ior(bits)
22    return b
23
24
25if len(sys.argv) > 3:
26    mask_fname = sys.argv[3]
27    mask = binned_bitsets_from_file(open(mask_fname))
28    new_bitsets = dict()
29    for key in bitsets:
30        if key in mask:
31            b = clone(mask[key])
32            b.invert()
33            b.iand(bitsets[key])
34            new_bitsets[key] = b
35    bitsets = new_bitsets
36else:
37    mask = None
38
39for line in open(bed1_fname):
40    fields = line.split()
41    chr, start, end = fields[0], int(fields[1]), int(fields[2])
42    bases_covered = 0
43    if chr in bitsets:
44        bases_covered = bitsets[chr].count_range(start, end-start)
45    length = end - start
46    if mask and chr in mask:
47        bases_masked = mask[chr].count_range(start, end-start)
48        length -= bases_masked
49    assert bases_covered <= length, f"{bases_covered!r}, {bases_masked!r}, {length!r}"
50    if length == 0:
51        print(0.0)
52    else:
53        print(bases_covered / length)
54