1
2%{
3#include "genotype.h"
4#include "probability.h"
5
6
7#define LocusGenotypeCountSetLISTLENGTH 128
8#define LocusGenotypeCountLISTLENGTH 128
9
10
11%}
12
13
14struct PopulationGenotypeCount
15int count[BiGenotypeLength]
16int defined_total
17Population * pop !link
18
19struct LocusGenotypeCount
20BiLocus * bl !link
21PopulationGenotypeCount ** pgc !list
22
23struct LocusGenotypeCountSet
24LocusGenotypeCount ** lgc !list
25
26
27
28%{
29#include "frequency_count.h"
30
31
32double chisquared_stat_LocusGenotypeCount(LocusGenotypeCount * lgc)
33{
34  double ret = 0.0;
35  int i;
36
37  for(i=0;i<lgc->len;i++) {
38    ret += chisquared_stat_PopulationGenotypeCount(lgc->pgc[i]);
39  }
40
41  return ret;
42}
43
44
45double chisquared_stat_PopulationGenotypeCount(PopulationGenotypeCount * pgc)
46{
47  double first;
48  double second;
49  double ret = 0.0;
50  double diff;
51  double expec;
52
53  assert(pgc->defined_total > 5);
54
55  first = central_first_frequency_PopulationGenotypeCount(pgc);
56
57  if( first < 0.0000000000000001 || first > 0.999999999999999 ) {
58    warn("Problem in chisquared statistic with central first freq at %f, returning 0.0",first);
59    return 0.0;
60  }
61
62
63  second = 1.0 - first;
64
65
66  expec =  (first*first * (double)pgc->defined_total);
67
68  /*  fprintf(stderr,"first is %.2f, totalis %d, expect is %f\n",first,pgc->defined_total,expec);*/
69
70  assert(expec > 0.0);
71
72  diff = pgc->count[BiGenotypeHomozygousFirst] - expec;
73
74  ret += (diff*diff)/expec;
75
76  expec =  (first*second*2 * (double)pgc->defined_total);
77  assert(expec > 0.0);
78
79  diff = pgc->count[BiGenotypeHetrozygous] - expec;
80
81  ret += (diff*diff)/expec;
82
83
84  expec =  (second*second * (double)pgc->defined_total);
85  assert(expec > 0.0);
86
87  diff = pgc->count[BiGenotypeHetrozygous] - expec;
88
89  ret += (diff*diff)/expec;
90
91  return ret;
92
93}
94
95boolean seen_each_genotype_in_all_populations_LocusGenotypeCount(LocusGenotypeCount * lgc)
96{
97  int i;
98
99
100  for(i=0;i<lgc->len;i++) {
101    if( lgc->pgc[i]->count[BiGenotypeHomozygousFirst] == 0 ||
102	lgc->pgc[i]->count[BiGenotypeHomozygousSecond] == 0 ||
103	lgc->pgc[i]->count[BiGenotypeHetrozygous] == 0 ) {
104      return FALSE;
105    }
106  }
107
108  return TRUE;
109}
110
111
112double central_first_frequency_PopulationGenotypeCount(PopulationGenotypeCount * pgc)
113{
114  assert(pgc != NULL);
115  assert(pgc->defined_total > 0);
116
117  return ((double)(pgc->count[BiGenotypeHomozygousFirst]*2+pgc->count[BiGenotypeHetrozygous]))/
118    (2*(double)(pgc->defined_total));
119}
120
121double smallest_minor_allele_LocusGenotypeCount(LocusGenotypeCount * lgc)
122{
123  int i;
124  double smallest = 0.5;
125  double central;
126
127
128  for(i=0;i<lgc->len;i++) {
129    central = central_first_frequency_PopulationGenotypeCount(lgc->pgc[i]);
130    if( central > 0.5 ) {
131      if( smallest > (1.0 - central)) {
132	smallest = (1.0 - central);
133      }
134    } else {
135      if( smallest > central ) {
136	smallest = central;
137      }
138    }
139  }
140
141  return smallest;
142}
143
144LocusGenotypeCount * resampled_LocusGenotypeCount(LocusGenotypeCount * lgc)
145{
146  int i,j;
147  LocusGenotypeCount * out;
148  PopulationGenotypeCount * pgc;
149  double rnd1;
150  double rnd2;
151  double central;
152
153  assert(lgc != NULL);
154
155  out = LocusGenotypeCount_alloc_len(lgc->len);
156  out->bl = lgc->bl;
157
158  for(i=0;i<lgc->len;i++) {
159
160
161    pgc  = PopulationGenotypeCount_alloc();
162
163    pgc->count[BiGenotypeHomozygousFirst] = 0;
164    pgc->count[BiGenotypeHetrozygous] = 0;
165    pgc->count[BiGenotypeHomozygousSecond] = 0;
166
167    central = central_first_frequency_PopulationGenotypeCount(lgc->pgc[i]);
168    for(j=0;j<lgc->pgc[i]->defined_total;j++) {
169      rnd1 = random_0_to_1();
170      rnd2 = random_0_to_1();
171      if( rnd1 < central && rnd2 < central ) {
172	pgc->count[BiGenotypeHomozygousFirst]++;
173      } else if ( rnd1 > central && rnd2 > central ) {
174	pgc->count[BiGenotypeHomozygousSecond]++;
175      } else {
176	pgc->count[BiGenotypeHetrozygous]++;
177      }
178    }
179    pgc->defined_total = lgc->pgc[i]->defined_total;
180    add_LocusGenotypeCount(out,pgc);
181  }
182
183  return out;
184}
185
186LocusGenotypeCountSet * LocusGenotypeCountSet_from_BiGenotypeSet(BiGenotypeSet * bgs)
187{
188  int i;
189  int j;
190  int p;
191  LocusGenotypeCount * lgc;
192  PopulationGenotypeCount* pgc;
193  LocusGenotypeCountSet * out;
194
195  out = LocusGenotypeCountSet_alloc_len(bgs->len);
196
197  for(i=0;i<bgs->len;i++) {
198    lgc = LocusGenotypeCount_alloc_len(bgs->ps->len);
199    lgc->bl = bgs->locus[i]->locus;
200
201
202    for(p=0;p<bgs->ps->len;p++) {
203
204      pgc = PopulationGenotypeCount_alloc();
205      pgc->pop = bgs->ps->pop[p];
206      add_LocusGenotypeCount(lgc,pgc);
207
208      pgc->count[BiGenotypeHomozygousFirst] = 0;
209      pgc->count[BiGenotypeHomozygousSecond] = 0;
210      pgc->count[BiGenotypeHetrozygous] = 0;
211      pgc->count[BiGenotypeUnknown] = 0;
212
213      pgc->defined_total = 0;
214
215      for(j=0;j<bgs->locus[i]->len;j++) {
216
217	if( bgs->locus[i]->big[j]->person->pop != pgc->pop ) {
218	  continue;
219	}
220
221	pgc->count[bgs->locus[i]->big[j]->type]++;
222	if( bgs->locus[i]->big[j]->type != BiGenotypeUnknown ) {
223	  pgc->defined_total++;
224	}
225
226      }
227
228
229    }
230    add_LocusGenotypeCountSet(out,lgc);
231
232  }
233
234  return out;
235}
236
237
238
239%}
240