1LoadFunctionLibrary("../IOFunctions.bf");
2LoadFunctionLibrary("../all-terms.bf");
3LoadFunctionLibrary("../models/frequencies.bf");
4
5
6/**
7 * Compute  all pairwise distances between sequences in a data set
8 * @name    distances.nucleotide.tn93
9 * @param   filter {String} - id of dataset
10 * @param   freqs {Matrix/null} - if not null, use these nucleotide frequencies
11 * @param   options{Dict/null} - options for ambiguity treatment
12 * @returns {Matrix} r - pairwise TN93 distances
13 */
14lfunction distances.nucleotide.tn93 (filter, freqs, options) {
15    if (null == freqs) {
16        freqs = (frequencies._aux.empirical.singlechar ({}, null, filter))[^"terms.efv_estimate"];
17    }
18
19    fY = freqs[1] + freqs[3];
20    fR = 1 - fY;
21
22	if (Min (freqs, 0) == 0) {
23		K2P = TRUE;
24	}
25	else {
26	    K2P = FALSE;
27		K1 = 2*freqs[0]*freqs[2]/fR;
28		K2 = 2*freqs[1]*freqs[3]/fY;
29		K3 = 2*(fR*fY-freqs[0]*freqs[2]*fY/fR-freqs[1]*freqs[3]*fR/fY);
30	}
31
32    sequence_count = ^(filter + ".species");
33    distances      = {sequence_count,sequence_count};
34    resolution     = "RESOLVE_AMBIGUITIES";
35    if (utility.Has (options, "ambigs","String")) {
36        resolution = options["ambigs"];
37    }
38
39    for (s1 = 0; s1 < sequence_count; s1 += 1) {
40        for (s2 = s1 + 1; s2 < sequence_count; s2 += 1) {
41             ExecuteCommands ("GetDataInfo (count, ^filter, s1, s2, `resolution`)");
42             totalSitesCompared = +count;
43             d = 1000.;
44             if (totalSitesCompared > 0) {
45                 count = count * (1/totalSitesCompared);
46                 AG                     = count[0][2] + count[2][0];
47                 CT                     = count[1][3] + count[3][1];
48                 transversions          = 1 - AG - CT - count[0][0] - count[1][1] - count[2][2] - count[3][3];
49                 if (K2P) {
50                    d1 = 1-2*(AG+CT)-transversions;
51                    d2 = 1-2*transversions;
52
53                    if (d1 >0 && d2>0) {
54                        d = -(0.5*Log(d1)+.25*Log(d2));
55                    }
56
57                 } else {
58                     d1 = 1-AG/K1-0.5*transversions/fR;
59                     d2 = 1-CT/K2-0.5*transversions/fY;
60                     d3 = 1-0.5*transversions/fR/fY;
61                     if (d1>0 && d2 >0 && d3 >0) {
62                        d = -K1*Log(d1)-K2*Log(d2)-K3*Log(d3);
63                     }
64                }
65            }
66            distances [s1][s2] = d;
67            distances [s2][s1] = d;
68        }
69    }
70
71
72    return distances;
73}
74
75/**
76 * Compute  all pairwise percent distances between sequences in a data set
77 * @name    distances.nucleotide.p_distance
78 * @param   filter  {String} - id of dataset
79 * @param   options {Dict/null} - options for ambiguity treatment
80 * @returns {Matrix} r - pairwise TN93 distances
81 */
82lfunction distances.p_distance (filter, options) {
83    sequence_count = ^(filter + ".species");
84    distances      = {sequence_count,sequence_count};
85    resolution     = "RESOLVE_AMBIGUITIES";
86    if (utility.Has (options, "ambigs","String")) {
87        resolution = options["ambigs"];
88    }
89
90    if (sequence_count) {
91        ExecuteCommands ("GetDataInfo (count, ^filter, 0, 0, `resolution`)");
92        off_diag = count["_MATRIX_ELEMENT_COLUMN_==_MATRIX_ELEMENT_ROW_"];
93    }
94
95    expr := "GetDataInfo (count, ^filter, s1, s2, `resolution`)";
96
97    for (s1 = 0; s1 < sequence_count; s1 += 1) {
98        for (s2 = s1 + 1; s2 < sequence_count; s2 += 1) {
99             ExecuteCommands (expr);
100             totalSitesCompared = +count;
101             if (totalSitesCompared != 0) {
102                identicalSites = +count[off_diag];
103                d = (totalSitesCompared-identicalSites) / totalSitesCompared;
104             } else {
105                d = 1000;
106             }
107             distances [s1][s2] = d;
108             distances [s2][s1] = d;
109        }
110    }
111
112    return distances;
113}
114
115