1 2%{ 3#include "genotype.h" 4#include "probability.h" 5 6 7#define LocusGenotypeCountSetLISTLENGTH 128 8#define LocusGenotypeCountLISTLENGTH 128 9 10 11%} 12 13 14struct PopulationGenotypeCount 15int count[BiGenotypeLength] 16int defined_total 17Population * pop !link 18 19struct LocusGenotypeCount 20BiLocus * bl !link 21PopulationGenotypeCount ** pgc !list 22 23struct LocusGenotypeCountSet 24LocusGenotypeCount ** lgc !list 25 26 27 28%{ 29#include "frequency_count.h" 30 31 32double chisquared_stat_LocusGenotypeCount(LocusGenotypeCount * lgc) 33{ 34 double ret = 0.0; 35 int i; 36 37 for(i=0;i<lgc->len;i++) { 38 ret += chisquared_stat_PopulationGenotypeCount(lgc->pgc[i]); 39 } 40 41 return ret; 42} 43 44 45double chisquared_stat_PopulationGenotypeCount(PopulationGenotypeCount * pgc) 46{ 47 double first; 48 double second; 49 double ret = 0.0; 50 double diff; 51 double expec; 52 53 assert(pgc->defined_total > 5); 54 55 first = central_first_frequency_PopulationGenotypeCount(pgc); 56 57 if( first < 0.0000000000000001 || first > 0.999999999999999 ) { 58 warn("Problem in chisquared statistic with central first freq at %f, returning 0.0",first); 59 return 0.0; 60 } 61 62 63 second = 1.0 - first; 64 65 66 expec = (first*first * (double)pgc->defined_total); 67 68 /* fprintf(stderr,"first is %.2f, totalis %d, expect is %f\n",first,pgc->defined_total,expec);*/ 69 70 assert(expec > 0.0); 71 72 diff = pgc->count[BiGenotypeHomozygousFirst] - expec; 73 74 ret += (diff*diff)/expec; 75 76 expec = (first*second*2 * (double)pgc->defined_total); 77 assert(expec > 0.0); 78 79 diff = pgc->count[BiGenotypeHetrozygous] - expec; 80 81 ret += (diff*diff)/expec; 82 83 84 expec = (second*second * (double)pgc->defined_total); 85 assert(expec > 0.0); 86 87 diff = pgc->count[BiGenotypeHetrozygous] - expec; 88 89 ret += (diff*diff)/expec; 90 91 return ret; 92 93} 94 95boolean seen_each_genotype_in_all_populations_LocusGenotypeCount(LocusGenotypeCount * lgc) 96{ 97 int i; 98 99 100 for(i=0;i<lgc->len;i++) { 101 if( lgc->pgc[i]->count[BiGenotypeHomozygousFirst] == 0 || 102 lgc->pgc[i]->count[BiGenotypeHomozygousSecond] == 0 || 103 lgc->pgc[i]->count[BiGenotypeHetrozygous] == 0 ) { 104 return FALSE; 105 } 106 } 107 108 return TRUE; 109} 110 111 112double central_first_frequency_PopulationGenotypeCount(PopulationGenotypeCount * pgc) 113{ 114 assert(pgc != NULL); 115 assert(pgc->defined_total > 0); 116 117 return ((double)(pgc->count[BiGenotypeHomozygousFirst]*2+pgc->count[BiGenotypeHetrozygous]))/ 118 (2*(double)(pgc->defined_total)); 119} 120 121double smallest_minor_allele_LocusGenotypeCount(LocusGenotypeCount * lgc) 122{ 123 int i; 124 double smallest = 0.5; 125 double central; 126 127 128 for(i=0;i<lgc->len;i++) { 129 central = central_first_frequency_PopulationGenotypeCount(lgc->pgc[i]); 130 if( central > 0.5 ) { 131 if( smallest > (1.0 - central)) { 132 smallest = (1.0 - central); 133 } 134 } else { 135 if( smallest > central ) { 136 smallest = central; 137 } 138 } 139 } 140 141 return smallest; 142} 143 144LocusGenotypeCount * resampled_LocusGenotypeCount(LocusGenotypeCount * lgc) 145{ 146 int i,j; 147 LocusGenotypeCount * out; 148 PopulationGenotypeCount * pgc; 149 double rnd1; 150 double rnd2; 151 double central; 152 153 assert(lgc != NULL); 154 155 out = LocusGenotypeCount_alloc_len(lgc->len); 156 out->bl = lgc->bl; 157 158 for(i=0;i<lgc->len;i++) { 159 160 161 pgc = PopulationGenotypeCount_alloc(); 162 163 pgc->count[BiGenotypeHomozygousFirst] = 0; 164 pgc->count[BiGenotypeHetrozygous] = 0; 165 pgc->count[BiGenotypeHomozygousSecond] = 0; 166 167 central = central_first_frequency_PopulationGenotypeCount(lgc->pgc[i]); 168 for(j=0;j<lgc->pgc[i]->defined_total;j++) { 169 rnd1 = random_0_to_1(); 170 rnd2 = random_0_to_1(); 171 if( rnd1 < central && rnd2 < central ) { 172 pgc->count[BiGenotypeHomozygousFirst]++; 173 } else if ( rnd1 > central && rnd2 > central ) { 174 pgc->count[BiGenotypeHomozygousSecond]++; 175 } else { 176 pgc->count[BiGenotypeHetrozygous]++; 177 } 178 } 179 pgc->defined_total = lgc->pgc[i]->defined_total; 180 add_LocusGenotypeCount(out,pgc); 181 } 182 183 return out; 184} 185 186LocusGenotypeCountSet * LocusGenotypeCountSet_from_BiGenotypeSet(BiGenotypeSet * bgs) 187{ 188 int i; 189 int j; 190 int p; 191 LocusGenotypeCount * lgc; 192 PopulationGenotypeCount* pgc; 193 LocusGenotypeCountSet * out; 194 195 out = LocusGenotypeCountSet_alloc_len(bgs->len); 196 197 for(i=0;i<bgs->len;i++) { 198 lgc = LocusGenotypeCount_alloc_len(bgs->ps->len); 199 lgc->bl = bgs->locus[i]->locus; 200 201 202 for(p=0;p<bgs->ps->len;p++) { 203 204 pgc = PopulationGenotypeCount_alloc(); 205 pgc->pop = bgs->ps->pop[p]; 206 add_LocusGenotypeCount(lgc,pgc); 207 208 pgc->count[BiGenotypeHomozygousFirst] = 0; 209 pgc->count[BiGenotypeHomozygousSecond] = 0; 210 pgc->count[BiGenotypeHetrozygous] = 0; 211 pgc->count[BiGenotypeUnknown] = 0; 212 213 pgc->defined_total = 0; 214 215 for(j=0;j<bgs->locus[i]->len;j++) { 216 217 if( bgs->locus[i]->big[j]->person->pop != pgc->pop ) { 218 continue; 219 } 220 221 pgc->count[bgs->locus[i]->big[j]->type]++; 222 if( bgs->locus[i]->big[j]->type != BiGenotypeUnknown ) { 223 pgc->defined_total++; 224 } 225 226 } 227 228 229 } 230 add_LocusGenotypeCountSet(out,lgc); 231 232 } 233 234 return out; 235} 236 237 238 239%} 240