1 /******************************************************************************
2
3 #### #### ##### # # ####
4 # # # # # ## ## # #
5 # # # # # ## # #
6 # # # # # # # ### #
7 # # # # # # # ### # #
8 ### # #### # # # ### ####
9
10 ******************************************************************************/
11 /* This file is part of MAPMAKER 3.0b, Copyright 1987-1992, Whitehead Institute
12 for Biomedical Research. All rights reserved. See READ.ME for license. */
13
14 #define INC_LIB
15 #define INC_MISC
16 #define INC_CALLQCTM
17 #define INC_QLOWLEVEL
18 #include "qtl.h"
19
20 /* Globals available to the outside world: */
21 real like_tolerance, pos_tolerance, mat_tolerance;
22 bool print_iter, print_rec_mat, bag_qctm, brute_force, print_brute_force;
23 bool debug_newton;
24 int max_intervals, max_genotype_vars, max_continuous_vars;
25 /* functions alloc_qctm_globals(), free_qctm_globals(),
26 qctm_globals_avail(), qctm_init(), qtl_conv_to_map(); */
27
28 /* Local to this file: */
29 void qctm();
30 void set_qctm_globals();
31 void fill_in_qtl_map();
32
33 real E_step();
34 void likelihood();
35 void make_rec_probs();
36
37 void ML_qtl_weight();
38 void kill_entry();
39
40 void ML_qtl_pos();
41 real do_brute_force();
42 real pos_like();
43
44 real variance();
45 real normal_density();
46
47 bool debug_qctm;
48 int n_individuals, n_intervals, n_qtl_genotypes;
49 int n_genotype_vars, n_continuous_vars;
50
51 /* WARNING: Make sure that the (wrong) globals from qtop.c,
52 num_continuous_vars and num_intervals, aren't used in this file! */
53
54 /* All of these variables are set & alloced by routines in qdata.c */
55 int max_interx_genotypes, max_backx_genotypes;
56 int **lookup_genotype; /* [bit-vect][interval#]=> A,B,H */
57 real **lookup_coded_genotype; /* [bit-vect][geno-var#]=> real# */
58
59 /* State variables for the QCTM E-M loop */
60 real **expected_genotype; /* [indivs][geno_vars+1] */
61 real *int_like; /* [intervals] */
62 real **S_matrix, **S_inverse; /* [geno_vars+1][geno_vars+1] */
63 real **indiv_S_matrix; /* ditto */
64 real *qctm_qtl_pos; /* [intervals] */
65 real *null_qtl_weight, *qctm_qtl_weight; /* [geno_vars+1] */
66 real *fix_qtl_weight, *temp_row; /* [geno_vars+1] see ML_qtl_weight */
67 GENO_PROBS *indiv_rec_like; /* [interval][int-geno][qtl-geno] */
68 GENO_PROBS *expected_recs, *trans_prob; /* ditto */
69 INTERVAL_GENOTYPE_PROBS *rec_like; /* [interval][int-geno] */
70 bool epistasis_kludge;
71
72 #define SMALL_VARIANCE 1e-5
73 #define TINY_VARIANCE 0.001
74 #define TINY_LIKELIHOOD 1e-40
75 #define SQRT_2_PI 2.506628
76 #define BIGGEST_EXPONENT 1e20
77 #define NOREC 0
78 #define REC 1
79 #define RECS 2
80
81
82
83
84 /****************************** PRELIMINARIES ******************************/
85
qctm_init()86 void qctm_init()
87 {
88 max_interx_genotypes= max_backx_genotypes= 0;
89 max_intervals= max_genotype_vars= max_continuous_vars= 0;
90 pos_tolerance= like_tolerance= mat_tolerance= -1.0;
91 debug_qctm= FALSE;
92
93 lookup_genotype= NULL; lookup_coded_genotype= NULL;
94 int_like= NULL; temp_row= NULL;
95 expected_genotype= NULL;
96 S_matrix= S_inverse= indiv_S_matrix= NULL;
97 expected_recs= indiv_rec_like= trans_prob= NULL;
98 rec_like= NULL;
99 qctm_qtl_weight= qctm_qtl_pos= NULL;
100 null_qtl_weight= NULL;
101 }
102
103
set_qctm_globals(data,map)104 void set_qctm_globals(data,map) /* called only by qctm itself */
105 DATA *data;
106 QTL_MAP *map;
107 {
108 int i, j, n, k, last;
109 int n1, n2, g1, g2;
110 real het_additive_coeff, a, b, c;
111
112 /* debug_qctm=TRUE; */
113
114 n_individuals= data->num_individuals;
115 n_intervals= data->num_intervals;
116 n_genotype_vars= (raw.data_type==INTERCROSS ? 2:1) * n_intervals;
117 if (n_intervals==0) n_qtl_genotypes=0; else
118 n_qtl_genotypes= ipow((raw.data_type==INTERCROSS ? 3:2),n_intervals);
119 n_continuous_vars= data->num_continuous_vars;
120 epistasis_kludge= FALSE;
121
122 /* Paranoia: Check if call to QCTM is OK? */
123 /* The n_inter<n_indiv test is to keep variance() from crashing. */
124
125 if (like_tolerance<=0.0 || pos_tolerance<=0.0 || mat_tolerance<=0.0 ||
126 n_intervals>=n_individuals || !qctm_globals_avail() ||
127 n_intervals>max_intervals || n_continuous_vars>max_continuous_vars)
128 send(CRASH);
129
130 if (raw.data_type==BACKCROSS) {
131 /* send(CRASH); Needs to be modified for epistasis and lookups */
132 for (i=0; i<n_intervals; i++) {
133 qctm_qtl_weight[i]= map->qtl_weight[i];
134 fix_qtl_weight[i]= map->constraint[i].backx_weight;
135 }
136 for (k=0; k<n_continuous_vars; k++) {
137 qctm_qtl_weight[n_intervals+k]= map->cont_var_weight[k];
138 fix_qtl_weight[n_intervals+k]= map->fix_cont_var_weight[k];
139 }
140 fix_qtl_weight[n_intervals+n_continuous_vars]= DONT_FIX;
141
142 } else if (raw.data_type==INTERCROSS) {
143 for (j=0, n=0; j<n_intervals; j++, n+=2) {
144 qctm_qtl_weight[n]= map->qtl_weight[j];
145 qctm_qtl_weight[n+1]= map->qtl_dominance[j];
146
147 a=map->constraint[j].a;
148 b=map->constraint[j].b;
149 c=map->constraint[j].c;
150
151 if (fix_weight_kludge) { /* for scan weight KLUDGE */
152 fix_qtl_weight[n]= map->qtl_weight[j]; fix_qtl_weight[n+1]=0.0;
153 } else if (a==0.0 && b==0.0) { /* nothing fixed */
154 fix_qtl_weight[n]= DONT_FIX; fix_qtl_weight[n+1]= DONT_FIX;
155 } else if (b==0.0) { /* fix additive term */
156 fix_qtl_weight[n]= c/a; fix_qtl_weight[n+1]= DONT_FIX;
157 } else { /* b!=0.0 -> fix dominance term */
158 fix_qtl_weight[n]= DONT_FIX; fix_qtl_weight[n+1]= c/b;
159 }
160
161 het_additive_coeff= (b!=0.0 ? (b-a)/b : 1.0);
162 for (i=0; i<n_qtl_genotypes; i++) {
163 if (lookup_genotype[i][j]==A) {
164 lookup_coded_genotype[i][n]= 0.0;
165 lookup_coded_genotype[i][n+1]= 0.0;
166 } else if (lookup_genotype[i][j]==B) {
167 lookup_coded_genotype[i][n]= 2.0;
168 lookup_coded_genotype[i][n+1]= 0.0;
169 } else if (lookup_genotype[i][j]==H) {
170 lookup_coded_genotype[i][n]= het_additive_coeff;
171 lookup_coded_genotype[i][n+1]= 1.0;
172 }
173 }
174 }
175
176 last= n_genotype_vars; /* actually it's the 1st unused genotype var */
177
178 for (k=0; k<n_continuous_vars; k++) { /* Includes epistasis term */
179 qctm_qtl_weight[last+k]= map->cont_var_weight[k];
180 fix_qtl_weight[last+k]= map->fix_cont_var_weight[k];
181 }
182
183 if (n_continuous_vars>=1 && map->cont_var[0]==EPISTASIS_TERM) {
184 if (n_intervals<2) send(CRASH);
185 n1= n_intervals-2; n2= n_intervals-1; /* the 2 epistatic loci */
186 for (i=0; i<n_qtl_genotypes; i++) {
187 g1=lookup_genotype[i][n1]; g2=lookup_genotype[i][n2];
188 if (g1==A || g2==A) lookup_coded_genotype[i][last]=0.0;
189 else if (g1==H && g2==H) lookup_coded_genotype[i][last]=1.0;
190 else if (g1==B && g2==H) lookup_coded_genotype[i][last]=2.0;
191 else if (g1==H && g2==B) lookup_coded_genotype[i][last]=2.0;
192 else /* (g1==B && g2==B) */ lookup_coded_genotype[i][last]=4.0;
193
194 }
195 epistasis_kludge= TRUE; n_continuous_vars-= 1;
196 last+=1; n_genotype_vars= last;
197 }
198
199 /* Mu term - with or without epistasis */
200 qctm_qtl_weight[last+n_continuous_vars]=map->mu;
201 fix_qtl_weight[last+n_continuous_vars]= DONT_FIX;
202
203 } else send(CRASH);
204 }
205
206
fill_in_qtl_map(map,new_like,qtl_weight)207 void fill_in_qtl_map(map,new_like,qtl_weight)
208 QTL_MAP *map;
209 real new_like, *qtl_weight;
210 {
211 int j, k, n, num, last;
212 real a, b, c;
213
214 map->abs_log_like= new_like;
215 map->log_like= new_like - map->null_log_like;
216 map->var_explained= 1.0 - (map->sigma_sq/map->null_sigma_sq);
217 map->chi_sq= 2.0 * map->log_like * log(10.0);
218
219 if (raw.data_type==BACKCROSS) {
220 for (j=0; j<n_intervals; j++) {
221 map->qtl_weight[j]= qtl_weight[j];
222 map->qtl_dominance[j]= 0.0;
223 }
224 for (k=0; k<n_continuous_vars; k++) {
225 map->cont_var_weight[k]= qctm_qtl_weight[n_intervals+k];
226 }
227
228 } else if (raw.data_type== INTERCROSS) {
229 for (j=0, n=0; j<n_intervals; j++, n+=2) {
230 a=map->constraint[j].a;
231 b=map->constraint[j].b;
232 c=map->constraint[j].c;
233
234 map->qtl_weight[j]= qtl_weight[n];
235 if (b==0.0 || fix_weight_kludge)
236 map->qtl_dominance[j]= qtl_weight[n+1];
237 else
238 map->qtl_dominance[j]= (c - a*qtl_weight[n])/b;
239 }
240
241 last= (!epistasis_kludge ? n_genotype_vars: n_genotype_vars-1);
242 num= (!epistasis_kludge ? n_continuous_vars: n_continuous_vars+1);
243 for (k=0; k<num; k++) { /* with or without epistasis */
244 map->cont_var_weight[k]= qctm_qtl_weight[last+k];
245 }
246
247 } else send(CRASH);
248 }
249
250
251 /************************** QTL_CONV_TO_MAP (QCTM) **************************/
252
qtl_conv_to_map(data,map)253 void qtl_conv_to_map(data,map)
254 DATA *data;
255 QTL_MAP *map; /* many parts of this are side-effected */
256 {
257 real old_like, new_like;
258 int i;
259 bool done;
260 /* use externs null_qtl_weight, qctm_qtl_weight, qctm_qtl_pos */
261
262 set_qctm_globals(data,map);
263 map->chi_sq= map->var_explained= map->log_like= 0.0;
264
265 /*** BAG_QCTM - SPEED THINGS UP FOR DEBUGGING OTHER PARTS OF THE CODE ***/
266 if (bag_qctm) {
267 map->log_like= map->abs_log_like= map->chi_sq= 1.0;
268 map->null_log_like= map->no_data_like= 0.0; map->var_explained= 0.001;
269 return;
270 }
271
272 /*** COMPUTE THE NULL QTL MAPS ***/
273 map->null_log_like=
274 E_step(map->qtl_pos,null_qtl_weight,&map->null_mu,&map->null_sigma_sq,
275 data,expected_genotype,S_matrix,expected_recs);
276 map->no_data_like= 0.0;
277 /* was no_data_like(data,qctm_qtl_weight,mu,sigma_sq) - see Obsolete.c */
278 if (print_iter) print_null_iteration(map);
279
280 /*** COMPUTE THE ACTUAL QTL MAP ***/
281 new_like= VERY_UNLIKELY; i=0; done=FALSE;
282 do {
283 old_like=new_like;
284 new_like=
285 E_step(map->qtl_pos,qctm_qtl_weight,&map->mu,&map->sigma_sq,data,
286 expected_genotype,S_matrix,expected_recs);
287
288 if ((fabs(new_like-old_like) < like_tolerance) ||
289 (map->sigma_sq < SMALL_VARIANCE) ||
290 (fix_weight_kludge && i==200)) done=TRUE; /* converged! */
291 else if (new_like < old_like) {
292 print("*** warning: likelihood decreased, quitting...\n");
293 done=TRUE;
294 }
295
296 if (print_iter && (!done || print_rec_mat)) {
297 fill_in_qtl_map(map,new_like,qctm_qtl_weight);
298 print_iteration(i,map,(i==0 ? 0.0 : new_like-old_like));
299 if (print_rec_mat)
300 do_print_E_step(expected_genotype,S_matrix,expected_recs,
301 n_individuals,n_genotype_vars,n_continuous_vars,n_intervals);
302 }
303
304 if (done) break;
305
306 ML_qtl_weight(S_matrix,expected_genotype,data->phenotype,
307 fix_qtl_weight,&map->mu,qctm_qtl_weight,&map->sigma_sq);
308 ML_qtl_pos(expected_recs,data->interval_len,map->fix_pos,map->qtl_pos);
309 i++; /* #iterations counter */
310
311 } while(TRUE); /* break is in the middle of the loop */
312 fill_in_qtl_map(map,new_like,qctm_qtl_weight);
313 }
314
315
316
317 /********************************** E-STEP **********************************/
318
E_step(qtl_pos,qtl_weight,mu,sigma_sq,data,expected_genotype,S_matrix,expected_recs)319 real E_step(qtl_pos,qtl_weight,mu,sigma_sq,data,expected_genotype,
320 S_matrix,expected_recs) /* return the log-likelihood */
321 real *qtl_pos, *qtl_weight;
322 real *mu, *sigma_sq; /* just ptrs to single reals */
323 DATA *data;
324 real **expected_genotype, **S_matrix; /* both side-effected */
325 GENO_PROBS *expected_recs; /* side-effected */
326 {
327 int *poss_genotype;
328 real *genotype_contribution;
329 real sum_exp_genotype, prediction;
330 real like, indiv_like, int_like, sum_int_like, total_log_like;
331 int i, j, n, k; /* use i=individuals; j=intervals; n=genotype_vars */
332 int x, y, z, geno, qtl, last, c, d, entry;
333
334 /* externs side-effected: indiv_S_matrix, indiv_rec_like, trans_prob,
335 and rec_like */
336
337 if (debug_qctm) { /* DEBUGGING CODE */
338 sf(ps,"qtl_pos: %lf\n",qtl_pos[0]); pr();
339 sf(ps,"qtl_weight: %lf\n",qtl_weight[0]); pr();
340 sf(ps,"mu: %lf\n",*mu); pr();
341 sf(ps,"sigma_sq: %lf\n",*sigma_sq); pr();
342 }
343
344 /*** Init expected_genotype, expected_recs, S_matrix and rec_probs ***/
345 for (j=0; j<n_intervals; j++)
346 for_interval_genotypes(raw.data_type,geno)
347 for_locus_genotypes(raw.data_type,qtl)
348 expected_recs[j][geno][qtl]= 0.0;
349 for (n=0; n<n_genotype_vars; n++) {
350 for (k=0; k<n_genotype_vars; k++) S_matrix[n][k]= 0.0;
351 for (i=0; i<n_individuals; i++) expected_genotype[i][n]= 0.0;
352 }
353 make_rec_probs(qtl_pos,data->interval_len,trans_prob);
354
355 /*** Now compute expected genotypes, recs, S_matrix and likelihood ***/
356 total_log_like = 0.0;
357 for (i=0; i<n_individuals; i++) {
358
359 if (debug_qctm) {
360 sf(ps,"===== indiv %d \n",i); print(ps);
361 for (j=0; j<n_intervals; j++) {
362 sf(ps,"interval %d probs:\n",j); pr(); z=0;
363 for_interval_genotypes(raw.data_type,y) {
364 sf(ps,"%2d %lf ",y,data->genotype_prob[i][j][y]); pr(); z++;
365 if (z%4==0 && y!=0) nl();
366 }
367 if (z%4!=0) nl();
368 }
369 }
370
371 /*** For each indiv we calculate indiv_like, indiv_rec_like and
372 indiv_S_matrix. These are then used in the running computation of
373 expected_genotype, S_matrix, expected_recs and the log like.
374 First, we initialize the indiv... variables. ***/
375
376 indiv_like=0.0; sum_int_like=0.0;
377 for (j=0; j<n_intervals; j++)
378 for_interval_genotypes(raw.data_type,geno)
379 for_locus_genotypes(raw.data_type,qtl)
380 indiv_rec_like[j][geno][qtl]= 0.0;
381 for (n=0; n<n_genotype_vars; n++)
382 for (k=0; k<=n; k++) indiv_S_matrix[n][k]= 0.0;
383
384 /*** Now we loop over all possible qtl-genotypes, getting a like and
385 the rec_like[][][] for each. These are mashed into indiv_like,
386 indiv_rec_like and indiv_S_matrix. ***/
387
388 for (x=0; x<n_qtl_genotypes; x++) {
389 poss_genotype= lookup_genotype[x];
390 genotype_contribution= lookup_coded_genotype[x];
391
392 /* this sets like, int_like, and rec_like */
393 likelihood(poss_genotype,genotype_contribution,trans_prob,
394 qtl_weight,mu,sigma_sq,data,i,&like,&int_like,rec_like);
395
396 indiv_like+=like;
397 sum_int_like+=int_like;
398 for (j=0; j<n_intervals; j++)
399 for (y=0; y<data->num_genotypes[i][j]; y++) {
400 geno= data->genotype[i][j][y];
401 indiv_rec_like[j][geno][poss_genotype[j]]+=rec_like[j][geno];
402 }
403 for (n=0; n<n_genotype_vars; n++) {
404 expected_genotype[i][n]+= genotype_contribution[n] * like;
405 for (k=0; k<=n; k++) indiv_S_matrix[n][k]+=
406 genotype_contribution[n] * genotype_contribution[k] * like;
407 }
408 } /* end of for possible_genotypes (x<n_qtl_genotypes) */
409
410 /* For regression on cont-vars alone, n_qtl_genotypes==0 and thus
411 likelihood() never gets called. Thus, we need to calculate the
412 indiv_like here... */
413
414 /*** For this indiv, we now normalize the indiv_rec_like and
415 expected_genotype, then we use this to calculate expected_recs,
416 S_matrix, and total_log_like. ***/
417
418 if (n_genotype_vars>0) {
419 if (fabs(sum_int_like-1.0)>0.01)
420 print("*** warning: sum_int_like in E_step is not 1.0\n");
421 if (indiv_like > VERY_SMALL) {
422 total_log_like+= log10(indiv_like);
423
424 for (j=0; j<n_intervals; j++)
425 for_interval_genotypes(raw.data_type,geno)
426 for_locus_genotypes(raw.data_type,qtl)
427 indiv_rec_like[j][geno][qtl]/= indiv_like;
428 for (n=0; n<n_genotype_vars; n++) {
429 expected_genotype[i][n]/= indiv_like;
430 for (k=0; k<=n; k++)
431 S_matrix[n][k]+= indiv_S_matrix[n][k]/indiv_like;
432 }
433 for (j=0; j<n_intervals; j++)
434 for_interval_genotypes(raw.data_type,geno)
435 for_locus_genotypes(raw.data_type,qtl)
436 expected_recs[j][geno][qtl]+=
437 indiv_rec_like[j][geno][qtl];
438 } /* end of if (indiv_like > VERY_SMALL) */
439
440 } else { /* n_genotype_vars==0 */
441 for (prediction= *mu, c=0; c<n_continuous_vars; c++)
442 prediction+= data->cont_var[c][i] * qtl_weight[c];
443 indiv_like=
444 normal_density((data->phenotype[i]-prediction),sigma_sq);
445 if (indiv_like > VERY_SMALL) total_log_like+=log10(indiv_like);
446 }
447
448 if (debug_qctm) { /* DEBUGGING CODE */
449 int g, q;
450 print("expected_geno: "); for (n=0; n<n_genotype_vars; n++)
451 { sf(ps,"%lf ",expected_genotype[i][n]); print(ps); } nl();
452 sf(ps,"indiv_like: %lf indiv_rec_like:\n",indiv_like); print(ps);
453 for (j=0; j<n_intervals; j++) {
454 for_locus_genotypes(raw.data_type,q) {
455 for_interval_genotypes(raw.data_type,g)
456 { sf(ps,"[%d][%d] ",g,q); pr(); } nl();
457 for_interval_genotypes(raw.data_type,g)
458 { sf(ps,"%7.5lf ",indiv_rec_like[j][g][q]); pr(); } nl();
459 }
460 }
461 }
462
463 } /* end of for i (individuals) */
464
465
466 /*** Finally, we fill in the upper right triangle of the S_matrix and
467 the constant entries of the S_matrix and the expected_genotypes.
468 These entries correspond to the continuous variables and the base
469 trait value (mu) in the regression equation, and are used by
470 ML_qtl_weight(), but not ML_qtl_pos(). ***/
471
472 for (n=0; n<n_genotype_vars; n++) /* fill in upper right */
473 for (k=0; k<n; k++) S_matrix[k][n]= S_matrix[n][k];
474 last= n_genotype_vars + n_continuous_vars;
475
476 for (i=0; i<n_individuals; i++) expected_genotype[i][last]= 1.0;
477
478 for (c=0; c<n_continuous_vars; c++) {
479 entry=n_genotype_vars+c;
480 for (i=0; i<n_individuals; i++)
481 expected_genotype[i][entry]= data->cont_var[c][i];
482
483 for (n=0; n<n_genotype_vars; n++) {
484 for (sum_exp_genotype=0.0, i=0; i<n_individuals; i++)
485 sum_exp_genotype+=data->cont_var[c][i] * expected_genotype[i][n];
486 S_matrix[n][entry]=S_matrix[entry][n]= sum_exp_genotype;
487 }
488
489 for (sum_exp_genotype=0.0, i=0; i<n_individuals; i++)
490 sum_exp_genotype+=data->cont_var[c][i];
491 S_matrix[entry][last]=S_matrix[last][entry]= sum_exp_genotype;
492
493 x=n_genotype_vars+c;
494 for (d=0; d<=c; d++) {
495 y=n_genotype_vars+d;
496 for (sum_exp_genotype=0.0, i=0; i<n_individuals; i++)
497 sum_exp_genotype+=data->cont_var[c][i] * data->cont_var[d][i];
498 S_matrix[x][y]= S_matrix[y][x]= sum_exp_genotype;
499 }
500 }
501
502 for (n=0; n<last; n++) { /* for all genotype_vars and continuous_vars */
503 for (sum_exp_genotype=0.0, i=0; i<n_individuals; i++)
504 sum_exp_genotype+= expected_genotype[i][n]; /* sum for all indivs */
505 S_matrix[last][n]= S_matrix[n][last]= sum_exp_genotype; /* mu entry */
506 }
507 S_matrix[last][last]= (real)(n_individuals);
508
509 return(total_log_like);
510 }
511
512
likelihood(qtl_genotype,contribution,trans_prob,qtl_weight,mu,sigma_sq,data,indiv,total_like,total_int_like,rec_like)513 void likelihood(qtl_genotype,contribution,trans_prob,qtl_weight,mu,sigma_sq,
514 data,indiv,total_like,total_int_like,rec_like)
515 int *qtl_genotype;
516 real *contribution;
517 GENO_PROBS *trans_prob;
518 real *qtl_weight;
519 real *mu, *sigma_sq; /* mu and sigma_sq are pointers to single numbers */
520 DATA *data;
521 int indiv;
522 real *total_like; /* side-effected - is a ptr to a single num */
523 real *total_int_like; /* side-effected - is also a ptr to a single num */
524 INTERVAL_GENOTYPE_PROBS *rec_like; /* [interval][int-geno] - side-effected */
525 {
526 real normal_like, prediction, ratio;
527 int j, y, n, c, geno;
528
529 /* Some elements of the data struct we use in here... */
530 INTERVAL_GENOTYPE_PROBS *indiv_prob= data->genotype_prob[indiv];
531 int *num_int_genos= data->num_genotypes[indiv];
532 INT_POSS_GENOTYPES *poss_geno= data->genotype[indiv];
533 /* poss_geno[interval#][poss-geno#]=> genotype code */
534
535 /* int_like is used as a temp in here */
536
537 *total_int_like= 1.0;
538 for (j=0; j<n_intervals; j++) {
539 int_like[j]= 0.0;
540 /* for_interval_genotypes(data_type,geno) { EFFECTIVELY */
541 for (y=0; y<num_int_genos[j]; y++) {
542 geno= poss_geno[j][y];
543 int_like[j]+= rec_like[j][geno]=
544 trans_prob[j][geno][qtl_genotype[j]] * indiv_prob[j][geno];
545 }
546 *total_int_like *= int_like[j];
547 }
548
549 for (n=0, prediction= *mu; n<n_genotype_vars; n++)
550 prediction+= contribution[n] * qtl_weight[n];
551 for (c=0; c<n_continuous_vars; c++) /* ADDED for cont vars */
552 prediction+= data->cont_var[c][indiv] * qtl_weight[n_genotype_vars+c];
553 normal_like= normal_density((data->phenotype[indiv]-prediction),sigma_sq);
554 *total_like= *total_int_like * normal_like;
555
556 for (j=0; j<n_intervals; j++) {
557 ratio= (*total_like>VERY_SMALL ? *total_like/int_like[j] : 0.0);
558 for (y=0; y<num_int_genos[j]; y++) { /* for_interval_genotypes y */
559 geno= poss_geno[j][y];
560 rec_like[j][geno] *= ratio;
561 }
562 }
563
564 if (debug_qctm) { /* DEBUGGING CODE */
565 int x;
566 x=(raw.data_type==INTERCROSS ? 2:1);
567 for (j=0; j<n_intervals; j++) {
568 sf(ps,"poss_geno=%d cont=(%3.1lf %3.1lf) int_like=%lf rec_like:\n",
569 qtl_genotype[j],contribution[x*j],contribution[x*j+1],int_like[j]);
570 pr();
571 }
572 sf(ps,"total_int_like=%lf norml=%lf total=%lf\n=====\n",
573 *total_int_like,normal_like,*total_like); pr();
574 }
575
576 }
577
578
make_rec_probs(qtl_pos,interval_rf,trans_prob)579 void make_rec_probs(qtl_pos,interval_rf,trans_prob)
580 real *qtl_pos, *interval_rf;
581 GENO_PROBS *trans_prob;
582 {
583 int j, geno, qtl, left, right;
584 real left_rf, right_rf, sum;
585
586 for (j=0; j<n_intervals; j++) {
587 left_rf=
588 rmaxf(MIN_REC_FRAC,min(qtl_pos[j],MAX_FRAC_OF_RF*interval_rf[j]));
589 right_rf= (interval_rf[j] - left_rf)/(1 - 2*left_rf);
590 for_interval_genotypes(raw.data_type,geno) {
591 sum= 0.0;
592 left= left_genotype[geno];
593 right= right_genotype[geno];
594 for_locus_genotypes(raw.data_type,qtl) {
595 sum+= trans_prob[j][geno][qtl]=
596 (transition_prob(raw.data_type,left,qtl,left_rf) *
597 transition_prob(raw.data_type,qtl,right,right_rf)) /
598 transition_prob(raw.data_type,left,right,interval_rf[j]);
599 }
600 if (fabs(sum-1.0)>0.01)
601 print("*** warning: the sum of the trans_probs is not 1.0\n");
602 }
603 }
604
605 if (debug_qctm) {
606 for (j=0; j<n_intervals; j++) {
607 sf(ps,"rec_probs for interval %d rf=%lf\n",j,interval_rf[j]); pr();
608 for_interval_genotypes(raw.data_type,geno) {
609 sf(ps,"%2d ",geno); pr();
610 for_locus_genotypes(raw.data_type,qtl)
611 { sf(ps,"[%d] %lf ",qtl,trans_prob[j][geno][qtl]); pr(); } nl();
612 }
613 }
614 }
615
616 }
617
618
normal_density(x,sigma_sq)619 real normal_density(x,sigma_sq)
620 real x,*sigma_sq;
621 {
622 real exponent;
623
624 if (*sigma_sq < TINY_VARIANCE) {
625 print("warning: sigma_sq exceeds tiny_variance...\n");
626 *sigma_sq= TINY_VARIANCE;
627 }
628 exponent = ((x*x)/(2.0 * *sigma_sq))
629 + (0.5 * (log(*sigma_sq))) + log(SQRT_2_PI);
630
631 if (exponent < BIGGEST_EXPONENT) return(exp(-exponent));
632 else return(TINY_LIKELIHOOD);
633 }
634
635
636
637 /******************** ML_QTL_WEIGHT ********************/
638
ML_qtl_weight(S_matrix,expected_genotype,phenotype,fix_weight,mu,qtl_weight,sigma_sq)639 void ML_qtl_weight(S_matrix,expected_genotype,phenotype,fix_weight,
640 mu,qtl_weight,sigma_sq)
641 real **S_matrix, **expected_genotype, *phenotype, *fix_weight;
642 real *mu, *qtl_weight, *sigma_sq; /* side effect these three */
643 {
644 int n, size, i, j;
645 real total, prediction;
646 /* S_inverse and row are side-effected */
647
648 size= n_genotype_vars+n_continuous_vars+1;
649
650
651 if (!fix_weight_kludge) {
652 for (n=0; n<size; n++)
653 if (fix_weight[n]!=DONT_FIX) kill_entry(S_matrix,size,n);
654
655 mat_invert(S_matrix,size,S_inverse);
656 array_times_matrix(phenotype,expected_genotype,n_individuals,size,
657 temp_row);
658 array_times_matrix(temp_row,S_inverse,size,size,qtl_weight);
659
660 for (n=0; n<size; n++)
661 if (fix_weight[n]!=DONT_FIX) qtl_weight[n]=fix_weight[n];
662 *mu= qtl_weight[size-1]; /* last entry is mu */
663
664 } else { /* if fix_weight_kludge */
665 for (n=0; n<size-1; n++) qtl_weight[n]= fix_weight[n];
666 for (i=0, total=0.0; i<n_individuals; i++) {
667 for (n=0, prediction= 0.0; n<size-1; n++)
668 prediction+= qtl_weight[n] * expected_genotype[i][n];
669 total+= phenotype[i] - prediction;
670 }
671 *mu= total/((real)n_individuals);
672 }
673
674 *sigma_sq=
675 variance(phenotype,qtl_weight,expected_genotype,S_matrix,mu,size);
676 }
677
678
kill_entry(S_matrix,size,i)679 void kill_entry(S_matrix,size,i)
680 real **S_matrix;
681 int size, i; /* index of entry to kill */
682 {
683 int n;
684
685 for (n=0; n<size; n++) S_matrix[i][n]= S_matrix[n][i]= 0.0;
686 S_matrix[i][i]= 1.0;
687 }
688
689
variance(phenotype,qtl_weight,expected_genotype,S_matrix,mu,size)690 real variance(phenotype,qtl_weight,expected_genotype,S_matrix,mu,size)
691 real *phenotype, *qtl_weight, **expected_genotype, **S_matrix, *mu;
692 int size;
693 {
694 int i, n, m;
695 real var, prediction, term1, term2, pheno;
696
697 term1= term2= 0.0;
698
699 for (i=0; i<n_individuals; i++) {
700 for (n=0, prediction= 0.0; n<size-1; n++)
701 prediction+= qtl_weight[n] * expected_genotype[i][n];
702 pheno = phenotype[i] - *mu;
703 term1+= pheno*pheno - 2.0*pheno*prediction;
704 }
705
706 for (n=0; n<size-1; n++) for (m=0; m<size-1; m++)
707 term2+= qtl_weight[n] * qtl_weight[m] * S_matrix[n][m];
708
709 var = (term1 + term2) / ((real)n_individuals);
710 return(var);
711 }
712
713
714 /******************** ML_QTL_POS ********************/
715
716 #define DIVS 100
717 #define STEP 0.02
718
ML_qtl_pos(expected_recs,interval_rf,fix_pos,qtl_pos)719 void ML_qtl_pos(expected_recs,interval_rf,fix_pos,qtl_pos)
720 GENO_PROBS *expected_recs;
721 real *interval_rf;
722 real *fix_pos; /* []==DONT_FIX if we don't want to fix it */
723 real *qtl_pos; /* side-effected */
724 {
725 int i;
726 real start, end, interval_cm, pos, max_cm, min_cm;
727
728 for (i=0; i<n_intervals; i++) {
729
730 if (fix_pos[i]!=DONT_FIX) qtl_pos[i]=fix_pos[i];
731
732 else if (brute_force) {
733 /***** Brute force computation to find a maximum ******/
734
735 /* First pass */
736 interval_cm= haldane_cm(interval_rf[i]);
737 min_cm= start= MIN_CM;
738 max_cm= end= haldane_cm(MAX_FRAC_OF_RF*interval_rf[i]);
739 pos= do_brute_force(expected_recs,i,interval_rf[i],start,end,DIVS);
740
741 /* Second pass */
742 start= rmaxf(pos-STEP*interval_cm,min_cm);
743 end= rminf(pos+STEP*interval_cm,max_cm);
744 pos= do_brute_force(expected_recs,i,interval_rf[i],start,end,DIVS);
745
746 qtl_pos[i]= unhaldane_cm(pos);
747 } else /* !brute_force */ send(CRASH);
748 } /* for i (all intervals) */
749 }
750
751
do_brute_force(expected_recs,i,interval_rf,start,end,steps)752 real do_brute_force(expected_recs,i,interval_rf,start,end,steps)
753 /* return the best pos in cM */
754 GENO_PROBS *expected_recs;
755 int i; /* interval# */
756 real interval_rf, start, end; /* start and end are positions in cM */
757 int steps;
758 {
759 real inc, pos, like, max_like, best_pos;
760 int j;
761
762 inc=(end-start)/((real)(steps-1)); max_like= -VERY_BIG;
763 for (j=0, pos=start; j<steps; j++, pos+=inc) {
764 like= pos_like(unhaldane_cm(pos),interval_rf,expected_recs,i);
765 if (like>max_like) { max_like=like; best_pos=pos; }
766 }
767 return(best_pos);
768 }
769
770
pos_like(left_theta,interval_theta,expected_recs,i)771 real pos_like(left_theta,interval_theta,expected_recs,i)
772 GENO_PROBS *expected_recs;
773 real left_theta, interval_theta;
774 int i; /* the interval# */
775 {
776 real event_like, pos_like, right_theta;
777 int interval_geno, left_geno, right_geno, qtl_geno;
778
779 right_theta= (interval_theta-left_theta)/(1.0-2.0*left_theta);
780 pos_like=0.0;
781
782 for_interval_genotypes(raw.data_type,interval_geno) {
783 left_geno= left_genotype[interval_geno];
784 right_geno= right_genotype[interval_geno];
785 for_locus_genotypes(raw.data_type,qtl_geno) {
786 event_like= apriori_prob(raw.data_type,left_geno) *
787 transition_prob(raw.data_type,left_geno,qtl_geno,left_theta) *
788 transition_prob(raw.data_type,qtl_geno,right_geno,right_theta);
789 pos_like+=
790 log(event_like) * expected_recs[i][interval_geno][qtl_geno];
791 }
792 }
793 return(pos_like);
794 }
795
796
797 /* real left_recs, right_recs, left_norecs, right_norecs;
798 left_recs= left_norecs= right_recs= right_norecs= 0.0;
799 if (raw.data_type!=BACKCROSS) send(CRASH);
800 if (left_geno==qtl_geno)
801 left_norecs+= expected_recs[i][interval_geno][qtl_geno];
802 else left_recs+= expected_recs[i][interval_geno][qtl_geno];
803 if (right_geno==qtl_geno)
804 right_norecs+= expected_recs[i][interval_geno][qtl_geno];
805 else right_recs+= expected_recs[i][interval_geno][qtl_geno];
806 like= left_recs*log(left_theta) + left_norecs*log(1.0-left_theta) +
807 + right_recs*log(right_theta) + right_norecs*log(1.0-right_theta); */
808
809 #ifdef VERY_OLD_CODE
810
811 start= 0.0;
812 inc= (haldane_cm(interval_rf[i])/(DIVS-1));
813 if (print_brute_force) {
814 sf(ps,PrBF1,i,interval_rf[i],qtl_pos[i]);
815 print(ps);
816 }
817 max_pos=do_brute_force(start,inc,interval_rf[i],
818 expected_recs,i,print_brute_force,&unimodal);
819
820 if (!unimodal) {
821 print("*** warning: pos_likes is not unimodal...\n");
822 qtl_pos[i]= max_pos;
823 } else { /* unimodal */
824 /* Do it again, with interval= max_pos+/-inc */
825 start= haldane_cm(max_pos)-inc; /* rrange takes care */
826 inc= (2*inc)/(DIVS-1); /* of fencepost error */
827 /* if (print_brute_force) { print("second pass:\n"); } */
828 qtl_pos[i]=do_brute_force(start,inc,interval_rf[i],
829 expected_recs,i,FALSE,&unimodal);
830 }
831
832 if (print_iter && !print_brute_force) {
833 sf(ps,PrI,i,interval_rf[i],
834 guess_pos(expected_recs,i,interval_rf[i]),
835 qtl_pos[i]); print(ps);
836 }
837
do_brute_force(start,inc,theta,expected_recs,interval,do_print,unimodal)838 real do_brute_force(start,inc,theta,expected_recs,interval,do_print,unimodal)
839 real start, inc, theta;
840 GENO_PROBS *expected_recs;
841 int interval, do_print, *unimodal;
842 {
843 real pos, cm, max_pos, max_like, d, d2, f, prev_f;
844 int j, max_j, dip, got_f;
845
846 max_like= VERY_UNLIKELY; dip= FALSE; *unimodal=TRUE;
847 for (j=0, cm=start; j<DIVS; j++,cm+=inc) {
848 pos=unhaldane_cm(cm);
849 if (!rrange(&pos, MIN_REC_FRAC, MAX_FRAC_OF_RF*theta)) continue;
850 pos_likes(pos,expected_recs,interval,theta,&d,&d2,&f);
851 if (do_print) {
852 sf(ps,PrBF2,pos,f,dip);print(ps);
853 }
854 if (f>max_like) {
855 max_like=f; max_pos=pos; max_j=j;
856 if (dip) *unimodal= FALSE;
857 } else if (f<max_like) dip= TRUE;
858 }
859 if (do_print) {
860 sf(ps,PrBF3,max_pos,max_like);
861 print(ps);
862 }
863 if (max_like==VERY_UNLIKELY) { send(CRASH); }
864 return(max_pos);
865 }
866
867
pos_likes(lambda,expected_recs,i,theta,deriv,deriv2,f)868 void pos_likes(lambda,expected_recs,i,theta,deriv,deriv2,f)
869 real lambda;
870 GENO_PROBS *expected_recs;
871 int i;
872 real theta;
873 real *deriv, *deriv2, *f;
874 {
875 real w1,x1,y1,z1,w2,x2,y2,z2;
876 real lambda_star, one_minus_lambda, one_minus_2_lambda;
877 real sq_one_minus_2_lambda, theta_minus_lambda;
878 real one_minus_theta_lambda;
879 real nr1, nr2, r_1, r_2;
880
881 rrange(&lambda,0.001,0.499);
882
883 r_1= expected_recs[i][REC][REC] + expected_recs[i][REC][NOREC];
884 r_2= expected_recs[i][REC][REC] + expected_recs[i][NOREC][REC];
885 nr1= expected_recs[i][NOREC][REC] + expected_recs[i][NOREC][NOREC];
886 nr2= expected_recs[i][REC][NOREC] + expected_recs[i][NOREC][NOREC];
887
888 lambda_star= (theta-lambda)/(1.0-2.0*lambda);
889 *f= r_1*log(lambda) + r_2*log(lambda_star) +
890 nr2*log(1.0-lambda_star) + nr1*log(1.0-lambda);
891 /*
892 one_minus_lambda= 1.0 - lambda;
893 one_minus_2_lambda= 1.0 - (2.0*lambda);
894 sq_one_minus_2_lambda= sq(one_minus_2_lambda);
895 theta_minus_lambda= theta - lambda;
896 one_minus_theta_lambda= 1.0 - theta - lambda;
897
898 w1= r_1 / lambda;
899 x1= r_2 * (2.0/one_minus_2_lambda - 1.0/theta_minus_lambda);
900 y1= nr2 * (-(2.0*lambda_star-1.0) / one_minus_theta_lambda);
901 z1= nr1 * (-1.0 / one_minus_lambda);
902
903 w2= -r_1 / sq(lambda);
904 x2= r_2 * (4.0/sq_one_minus_2_lambda - 1.0/sq(theta_minus_lambda));
905 y2= -nr2 * ((-2.0/sq_one_minus_2_lambda) +
906 (4.0*theta - 2.0)/
907 (sq(one_minus_theta_lambda) * one_minus_2_lambda));
908 z2= nr1 * (-1.0/sq(one_minus_lambda));
909
910 *deriv= w1 + x1 + y1 + z1;
911 *deriv2= w2 + x2 + y2 + z2;
912 */
913 }
914
guess_pos(expected_recs,i,theta_interval)915 real guess_pos(expected_recs,i,theta_interval)
916 GENO_PROBS *expected_recs;
917 int i;
918 real theta_interval;
919 {
920 real total_recs, left_recs, right_recs, L, R, X;
921 real left_cm, right_cm, cm_pos, pos;
922 int left, right;
923
924 total_recs= left_recs= right_recs= 0.0;
925 {
926 L= ((real)left); R= ((real)right);
927 X= expected_recs[i][left][right];
928 left_recs+= L * X;
929 right_recs+= R * X;
930 total_recs+= (L+R) * X;
931 }
932
933 left_cm= haldane_cm(rminf(left_recs/total_recs, 0.49));
934 right_cm= haldane_cm(rminf(right_recs/total_recs,0.49));
935 cm_pos = haldane_cm(theta_interval) * (left_cm /(left_cm +right_cm));
936 pos = unhaldane_cm(cm_pos);
937 return(pos);
938 }
939
940 #endif
941
qtl_noconv_to_map(data,map)942 void qtl_noconv_to_map(data,map) /* OBSOLETE, I THINK */
943 DATA *data;
944 QTL_MAP *map; /* many parts of this are side-effected */
945 {
946 real old_like, new_like;
947 int i;
948 bool done;
949 /* use externs null_qtl_weight, qctm_qtl_weight, qctm_qtl_pos */
950
951 set_qctm_globals(data,map);
952 map->chi_sq= map->var_explained= map->log_like= 0.0;
953
954 /*** COMPUTE THE NULL QTL MAPS ***/
955 map->null_log_like=
956 E_step(map->qtl_pos,null_qtl_weight,&map->null_mu,&map->null_sigma_sq,
957 data,expected_genotype,S_matrix,expected_recs);
958 map->no_data_like= 0.0;
959
960 /*** COMPUTE THE ACTUAL QTL MAP ***/
961 /* qctm_qtl_weight[] was initialized from the map->qtl_weight[] and
962 map->qtl_dominance[] entries by set_qctm_globals() */
963 new_like=
964 E_step(map->qtl_pos,qctm_qtl_weight,&map->mu,&map->sigma_sq,data,
965 expected_genotype,S_matrix,expected_recs);
966 fill_in_qtl_map(map,new_like,qctm_qtl_weight);
967 }
968
969
970
971 #ifdef DEBUGGING_CODE
972 printf("Expected Genotype:\n");
973 for (i=0; i<n_individuals; i++) {
974 printf("%d: ",i);
975 for (j=0; j<size; j++) printf("%8.5lf ",expected_genotype[i][j]);
976 printf("\n");
977 }
978 printf("\nS Matrix:\n");
979 for (i=0; i<size; i++) {
980 printf("%d: ",i);
981 for (j=0; j<size; j++) printf("%8.5lf ",S_matrix[i][j]);
982 printf("\n");
983 }
984 #endif
985
986