1 /******************************************************************************
2 
3   ####    ####    #####  #    #           ####
4  #    #  #    #     #    ##  ##          #    #
5  #    #  #          #    # ## #          #
6  #  # #  #          #    #    #   ###    #
7  #   #   #    #     #    #    #   ###    #    #
8   ### #   ####      #    #    #   ###     ####
9 
10 ******************************************************************************/
11 /* This file is part of MAPMAKER 3.0b, Copyright 1987-1992, Whitehead Institute
12    for Biomedical Research. All rights reserved. See READ.ME for license. */
13 
14 #define INC_LIB
15 #define INC_MISC
16 #define INC_CALLQCTM
17 #define INC_QLOWLEVEL
18 #include "qtl.h"
19 
20 /* Globals available to the outside world: */
21 real like_tolerance, pos_tolerance, mat_tolerance;
22 bool print_iter, print_rec_mat, bag_qctm, brute_force, print_brute_force;
23 bool debug_newton;
24 int  max_intervals, max_genotype_vars, max_continuous_vars;
25 /* functions alloc_qctm_globals(), free_qctm_globals(),
26    qctm_globals_avail(), qctm_init(), qtl_conv_to_map(); */
27 
28 /* Local to this file: */
29 void qctm();
30 void set_qctm_globals();
31 void fill_in_qtl_map();
32 
33 real E_step();
34 void likelihood();
35 void make_rec_probs();
36 
37 void ML_qtl_weight();
38 void kill_entry();
39 
40 void ML_qtl_pos();
41 real do_brute_force();
42 real pos_like();
43 
44 real variance();
45 real normal_density();
46 
47 bool debug_qctm;
48 int n_individuals, n_intervals, n_qtl_genotypes;
49 int n_genotype_vars, n_continuous_vars;
50 
51 /* WARNING: Make sure that the (wrong) globals from qtop.c,
52    num_continuous_vars and num_intervals, aren't used in this file! */
53 
54 /* All of these variables are set & alloced by routines in qdata.c */
55 int max_interx_genotypes, max_backx_genotypes;
56 int **lookup_genotype;                   /* [bit-vect][interval#]=> A,B,H */
57 real **lookup_coded_genotype;            /* [bit-vect][geno-var#]=> real# */
58 
59 /* State variables for the QCTM E-M loop */
60 real **expected_genotype;		 /* [indivs][geno_vars+1] */
61 real *int_like;	                         /* [intervals] */
62 real **S_matrix, **S_inverse;         	 /* [geno_vars+1][geno_vars+1] */
63 real **indiv_S_matrix;                	 /* ditto */
64 real *qctm_qtl_pos;  	                 /* [intervals] */
65 real *null_qtl_weight, *qctm_qtl_weight; /* [geno_vars+1] */
66 real *fix_qtl_weight, *temp_row;	 /* [geno_vars+1] see ML_qtl_weight */
67 GENO_PROBS *indiv_rec_like;              /* [interval][int-geno][qtl-geno] */
68 GENO_PROBS *expected_recs, *trans_prob;	 /* ditto */
69 INTERVAL_GENOTYPE_PROBS *rec_like;       /* [interval][int-geno] */
70 bool epistasis_kludge;
71 
72 #define SMALL_VARIANCE 	 1e-5
73 #define TINY_VARIANCE 	 0.001
74 #define TINY_LIKELIHOOD  1e-40
75 #define SQRT_2_PI 	 2.506628
76 #define BIGGEST_EXPONENT 1e20
77 #define NOREC 0
78 #define REC 1
79 #define RECS 2
80 
81 
82 
83 
84 /****************************** PRELIMINARIES ******************************/
85 
qctm_init()86 void qctm_init()
87 {
88     max_interx_genotypes= max_backx_genotypes= 0;
89     max_intervals= max_genotype_vars= max_continuous_vars= 0;
90     pos_tolerance= like_tolerance=  mat_tolerance= -1.0;
91     debug_qctm= FALSE;
92 
93     lookup_genotype= NULL; lookup_coded_genotype= NULL;
94     int_like= NULL;    temp_row= NULL;
95     expected_genotype= NULL;
96     S_matrix= S_inverse= indiv_S_matrix= NULL;
97     expected_recs= indiv_rec_like= trans_prob= NULL;
98     rec_like= NULL;
99     qctm_qtl_weight= qctm_qtl_pos= NULL;
100     null_qtl_weight= NULL;
101 }
102 
103 
set_qctm_globals(data,map)104 void set_qctm_globals(data,map)	/* called only by qctm itself */
105 DATA *data;
106 QTL_MAP *map;
107 {
108     int i, j, n, k, last;
109     int n1, n2, g1, g2;
110     real het_additive_coeff, a, b, c;
111 
112     /* debug_qctm=TRUE; */
113 
114     n_individuals= data->num_individuals;
115     n_intervals=   data->num_intervals;
116     n_genotype_vars= (raw.data_type==INTERCROSS ? 2:1) * n_intervals;
117     if (n_intervals==0) n_qtl_genotypes=0; else
118       n_qtl_genotypes= ipow((raw.data_type==INTERCROSS ? 3:2),n_intervals);
119     n_continuous_vars= data->num_continuous_vars;
120     epistasis_kludge= FALSE;
121 
122     /* Paranoia: Check if call to QCTM is OK? */
123     /* The n_inter<n_indiv test is to keep variance() from crashing. */
124 
125     if (like_tolerance<=0.0 || pos_tolerance<=0.0 || mat_tolerance<=0.0 ||
126 	n_intervals>=n_individuals || !qctm_globals_avail() ||
127 	n_intervals>max_intervals || n_continuous_vars>max_continuous_vars)
128       send(CRASH);
129 
130     if (raw.data_type==BACKCROSS) {
131 	/* send(CRASH); Needs to be modified for epistasis and lookups */
132 	for (i=0; i<n_intervals; i++) {
133 	    qctm_qtl_weight[i]= map->qtl_weight[i];
134 	    fix_qtl_weight[i]=  map->constraint[i].backx_weight;
135 	}
136 	for (k=0; k<n_continuous_vars; k++) {
137 	    qctm_qtl_weight[n_intervals+k]= map->cont_var_weight[k];
138 	    fix_qtl_weight[n_intervals+k]=  map->fix_cont_var_weight[k];
139 	}
140 	fix_qtl_weight[n_intervals+n_continuous_vars]= DONT_FIX;
141 
142     } else if (raw.data_type==INTERCROSS) {
143 	for (j=0, n=0; j<n_intervals; j++, n+=2) {
144 	    qctm_qtl_weight[n]= map->qtl_weight[j];
145 	    qctm_qtl_weight[n+1]= map->qtl_dominance[j];
146 
147 	    a=map->constraint[j].a;
148 	    b=map->constraint[j].b;
149 	    c=map->constraint[j].c;
150 
151 	    if (fix_weight_kludge) { /* for scan weight KLUDGE */
152 	        fix_qtl_weight[n]= map->qtl_weight[j]; fix_qtl_weight[n+1]=0.0;
153 	    } else if (a==0.0 && b==0.0) { /* nothing fixed */
154 	        fix_qtl_weight[n]= DONT_FIX; fix_qtl_weight[n+1]= DONT_FIX;
155 	    } else if (b==0.0) { /* fix additive term */
156 		fix_qtl_weight[n]= c/a;      fix_qtl_weight[n+1]= DONT_FIX;
157 	    } else { /* b!=0.0 -> fix dominance term */
158 		fix_qtl_weight[n]= DONT_FIX; fix_qtl_weight[n+1]= c/b;
159 	    }
160 
161 	    het_additive_coeff= (b!=0.0 ? (b-a)/b : 1.0);
162 	    for (i=0; i<n_qtl_genotypes; i++) {
163 		if (lookup_genotype[i][j]==A) {
164 		    lookup_coded_genotype[i][n]=   0.0;
165 		    lookup_coded_genotype[i][n+1]= 0.0;
166 		} else if (lookup_genotype[i][j]==B) {
167 		    lookup_coded_genotype[i][n]=   2.0;
168 		    lookup_coded_genotype[i][n+1]= 0.0;
169 		} else if (lookup_genotype[i][j]==H) {
170 		    lookup_coded_genotype[i][n]= het_additive_coeff;
171 		    lookup_coded_genotype[i][n+1]= 1.0;
172 		}
173 	    }
174 	}
175 
176 	last= n_genotype_vars; /* actually it's the 1st unused genotype var */
177 
178 	for (k=0; k<n_continuous_vars; k++) { /* Includes epistasis term */
179 	    qctm_qtl_weight[last+k]= map->cont_var_weight[k];
180 	    fix_qtl_weight[last+k]=  map->fix_cont_var_weight[k];
181 	}
182 
183 	if (n_continuous_vars>=1 && map->cont_var[0]==EPISTASIS_TERM) {
184 	    if (n_intervals<2) send(CRASH);
185 	    n1= n_intervals-2; n2= n_intervals-1; /* the 2 epistatic loci */
186 	    for (i=0; i<n_qtl_genotypes; i++) {
187 		g1=lookup_genotype[i][n1]; g2=lookup_genotype[i][n2];
188 		if      (g1==A || g2==A)    lookup_coded_genotype[i][last]=0.0;
189 		else if (g1==H && g2==H)    lookup_coded_genotype[i][last]=1.0;
190 		else if (g1==B && g2==H)    lookup_coded_genotype[i][last]=2.0;
191 		else if (g1==H && g2==B)    lookup_coded_genotype[i][last]=2.0;
192 		else /* (g1==B && g2==B) */ lookup_coded_genotype[i][last]=4.0;
193 
194 	    }
195 	    epistasis_kludge= TRUE; n_continuous_vars-= 1;
196 	    last+=1; n_genotype_vars= last;
197 	}
198 
199 	/* Mu term - with or without epistasis */
200 	qctm_qtl_weight[last+n_continuous_vars]=map->mu;
201 	fix_qtl_weight[last+n_continuous_vars]= DONT_FIX;
202 
203     } else send(CRASH);
204 }
205 
206 
fill_in_qtl_map(map,new_like,qtl_weight)207 void fill_in_qtl_map(map,new_like,qtl_weight)
208 QTL_MAP *map;
209 real new_like, *qtl_weight;
210 {
211     int j, k, n, num, last;
212     real a, b, c;
213 
214     map->abs_log_like= new_like;
215     map->log_like= new_like - map->null_log_like;
216     map->var_explained= 1.0 - (map->sigma_sq/map->null_sigma_sq);
217     map->chi_sq= 2.0 * map->log_like * log(10.0);
218 
219     if (raw.data_type==BACKCROSS) {
220 	for (j=0; j<n_intervals; j++) {
221 	    map->qtl_weight[j]= qtl_weight[j];
222 	    map->qtl_dominance[j]= 0.0;
223 	}
224 	for (k=0; k<n_continuous_vars; k++) {
225 	    map->cont_var_weight[k]= qctm_qtl_weight[n_intervals+k];
226 	}
227 
228     } else if (raw.data_type== INTERCROSS) {
229 	for (j=0, n=0; j<n_intervals; j++, n+=2) {
230 	    a=map->constraint[j].a;
231 	    b=map->constraint[j].b;
232 	    c=map->constraint[j].c;
233 
234 	    map->qtl_weight[j]= qtl_weight[n];
235 	    if (b==0.0 || fix_weight_kludge)
236 	      map->qtl_dominance[j]= qtl_weight[n+1];
237 	    else
238 	      map->qtl_dominance[j]= (c - a*qtl_weight[n])/b;
239 	}
240 
241 	last= (!epistasis_kludge ? n_genotype_vars:   n_genotype_vars-1);
242 	num=  (!epistasis_kludge ? n_continuous_vars: n_continuous_vars+1);
243 	for (k=0; k<num; k++) { /* with or without epistasis */
244 	    map->cont_var_weight[k]= qctm_qtl_weight[last+k];
245 	}
246 
247     } else send(CRASH);
248 }
249 
250 
251 /************************** QTL_CONV_TO_MAP (QCTM) **************************/
252 
qtl_conv_to_map(data,map)253 void qtl_conv_to_map(data,map)
254 DATA *data;
255 QTL_MAP *map; 	/* many parts of this are side-effected */
256 {
257     real old_like, new_like;
258     int i;
259     bool done;
260     /* use externs null_qtl_weight, qctm_qtl_weight, qctm_qtl_pos */
261 
262     set_qctm_globals(data,map);
263     map->chi_sq= map->var_explained= map->log_like= 0.0;
264 
265     /*** BAG_QCTM - SPEED THINGS UP FOR DEBUGGING OTHER PARTS OF THE CODE ***/
266     if (bag_qctm) {
267 	map->log_like= map->abs_log_like= map->chi_sq= 1.0;
268 	map->null_log_like= map->no_data_like= 0.0; map->var_explained= 0.001;
269 	return;
270     }
271 
272     /*** COMPUTE THE NULL QTL MAPS ***/
273     map->null_log_like=
274       E_step(map->qtl_pos,null_qtl_weight,&map->null_mu,&map->null_sigma_sq,
275 	     data,expected_genotype,S_matrix,expected_recs);
276     map->no_data_like= 0.0;
277     /* was no_data_like(data,qctm_qtl_weight,mu,sigma_sq) - see Obsolete.c */
278     if (print_iter) print_null_iteration(map);
279 
280     /*** COMPUTE THE ACTUAL QTL MAP ***/
281     new_like= VERY_UNLIKELY; i=0; done=FALSE;
282     do {
283 	old_like=new_like;
284 	new_like=
285 	  E_step(map->qtl_pos,qctm_qtl_weight,&map->mu,&map->sigma_sq,data,
286 		 expected_genotype,S_matrix,expected_recs);
287 
288 	if ((fabs(new_like-old_like) < like_tolerance) ||
289 	    (map->sigma_sq < SMALL_VARIANCE) ||
290 	    (fix_weight_kludge && i==200)) done=TRUE; /* converged! */
291 	else if (new_like < old_like) {
292 	    print("*** warning: likelihood decreased, quitting...\n");
293 	    done=TRUE;
294 	}
295 
296 	if (print_iter && (!done || print_rec_mat)) {
297 	    fill_in_qtl_map(map,new_like,qctm_qtl_weight);
298 	    print_iteration(i,map,(i==0 ? 0.0 :	new_like-old_like));
299 	    if (print_rec_mat)
300 	      do_print_E_step(expected_genotype,S_matrix,expected_recs,
301 		n_individuals,n_genotype_vars,n_continuous_vars,n_intervals);
302 	}
303 
304 	if (done) break;
305 
306 	ML_qtl_weight(S_matrix,expected_genotype,data->phenotype,
307 	    fix_qtl_weight,&map->mu,qctm_qtl_weight,&map->sigma_sq);
308 	ML_qtl_pos(expected_recs,data->interval_len,map->fix_pos,map->qtl_pos);
309 	i++; /* #iterations counter */
310 
311     } while(TRUE); /* break is in the middle of the loop */
312     fill_in_qtl_map(map,new_like,qctm_qtl_weight);
313 }
314 
315 
316 
317 /********************************** E-STEP **********************************/
318 
E_step(qtl_pos,qtl_weight,mu,sigma_sq,data,expected_genotype,S_matrix,expected_recs)319 real E_step(qtl_pos,qtl_weight,mu,sigma_sq,data,expected_genotype,
320 	    S_matrix,expected_recs) /* return the log-likelihood */
321 real *qtl_pos, *qtl_weight;
322 real *mu, *sigma_sq; /* just ptrs to single reals */
323 DATA *data;
324 real **expected_genotype, **S_matrix;  /* both side-effected */
325 GENO_PROBS *expected_recs;             /* side-effected */
326 {
327     int *poss_genotype;
328     real *genotype_contribution;
329     real sum_exp_genotype, prediction;
330     real like, indiv_like, int_like, sum_int_like, total_log_like;
331     int i, j, n, k;   /* use i=individuals; j=intervals; n=genotype_vars */
332     int x, y, z, geno, qtl, last, c, d, entry;
333 
334     /* externs side-effected: indiv_S_matrix, indiv_rec_like, trans_prob,
335        and rec_like */
336 
337 if (debug_qctm) { /* DEBUGGING CODE */
338     sf(ps,"qtl_pos: %lf\n",qtl_pos[0]); pr();
339     sf(ps,"qtl_weight: %lf\n",qtl_weight[0]); pr();
340     sf(ps,"mu: %lf\n",*mu); pr();
341     sf(ps,"sigma_sq: %lf\n",*sigma_sq); pr();
342 }
343 
344     /*** Init expected_genotype, expected_recs, S_matrix and rec_probs ***/
345     for (j=0; j<n_intervals; j++)
346       for_interval_genotypes(raw.data_type,geno)
347 	for_locus_genotypes(raw.data_type,qtl)
348 	  expected_recs[j][geno][qtl]= 0.0;
349     for (n=0; n<n_genotype_vars; n++) {
350 	for (k=0; k<n_genotype_vars; k++) S_matrix[n][k]= 0.0;
351 	for (i=0; i<n_individuals; i++)	  expected_genotype[i][n]= 0.0;
352     }
353     make_rec_probs(qtl_pos,data->interval_len,trans_prob);
354 
355     /*** Now compute expected genotypes, recs, S_matrix and likelihood ***/
356     total_log_like = 0.0;
357     for (i=0; i<n_individuals; i++) {
358 
359 if (debug_qctm) {
360     sf(ps,"===== indiv %d \n",i); print(ps);
361     for (j=0; j<n_intervals; j++) {
362 	sf(ps,"interval %d probs:\n",j); pr(); z=0;
363 	for_interval_genotypes(raw.data_type,y) {
364 	    sf(ps,"%2d %lf   ",y,data->genotype_prob[i][j][y]); pr(); z++;
365 	    if (z%4==0 && y!=0) nl();
366 	}
367 	if (z%4!=0) nl();
368     }
369 }
370 
371 	/*** For each indiv we calculate indiv_like, indiv_rec_like and
372 	     indiv_S_matrix. These are then used in the running computation of
373 	     expected_genotype, S_matrix, expected_recs and the log like.
374 	     First, we initialize the indiv... variables. ***/
375 
376 	indiv_like=0.0; sum_int_like=0.0;
377 	for (j=0; j<n_intervals; j++)
378 	  for_interval_genotypes(raw.data_type,geno)
379 	    for_locus_genotypes(raw.data_type,qtl)
380 	      indiv_rec_like[j][geno][qtl]= 0.0;
381 	for (n=0; n<n_genotype_vars; n++)
382 	    for (k=0; k<=n; k++) indiv_S_matrix[n][k]= 0.0;
383 
384 	/*** Now we loop over all possible qtl-genotypes, getting a like and
385 	     the rec_like[][][] for each. These are mashed into indiv_like,
386 	     indiv_rec_like and indiv_S_matrix. ***/
387 
388 	for (x=0; x<n_qtl_genotypes; x++) {
389 	    poss_genotype= lookup_genotype[x];
390 	    genotype_contribution= lookup_coded_genotype[x];
391 
392 	    /* this sets like, int_like, and rec_like */
393 	    likelihood(poss_genotype,genotype_contribution,trans_prob,
394 		      qtl_weight,mu,sigma_sq,data,i,&like,&int_like,rec_like);
395 
396 	    indiv_like+=like;
397 	    sum_int_like+=int_like;
398 	    for (j=0; j<n_intervals; j++)
399 	      for (y=0; y<data->num_genotypes[i][j]; y++) {
400 		  geno= data->genotype[i][j][y];
401 		  indiv_rec_like[j][geno][poss_genotype[j]]+=rec_like[j][geno];
402 	      }
403 	    for (n=0; n<n_genotype_vars; n++) {
404 		expected_genotype[i][n]+= genotype_contribution[n] * like;
405 		for (k=0; k<=n; k++) indiv_S_matrix[n][k]+=
406 		  genotype_contribution[n] * genotype_contribution[k] * like;
407 	    }
408 	} /* end of for possible_genotypes (x<n_qtl_genotypes) */
409 
410 	/* For regression on cont-vars alone, n_qtl_genotypes==0 and thus
411 	   likelihood() never gets called. Thus, we need to calculate the
412 	   indiv_like here... */
413 
414 	/*** For this indiv, we now normalize the indiv_rec_like and
415 	     expected_genotype, then we use this to calculate expected_recs,
416 	     S_matrix, and total_log_like. ***/
417 
418 	if (n_genotype_vars>0) {
419 	    if (fabs(sum_int_like-1.0)>0.01)
420 	      print("*** warning: sum_int_like in E_step is not 1.0\n");
421 	    if (indiv_like > VERY_SMALL) {
422 		total_log_like+= log10(indiv_like);
423 
424 		for (j=0; j<n_intervals; j++)
425 		  for_interval_genotypes(raw.data_type,geno)
426 		    for_locus_genotypes(raw.data_type,qtl)
427 		      indiv_rec_like[j][geno][qtl]/= indiv_like;
428 		for (n=0; n<n_genotype_vars; n++) {
429 		    expected_genotype[i][n]/= indiv_like;
430 		    for (k=0; k<=n; k++)
431 		      S_matrix[n][k]+= indiv_S_matrix[n][k]/indiv_like;
432 		}
433 		for (j=0; j<n_intervals; j++)
434 		  for_interval_genotypes(raw.data_type,geno)
435 		    for_locus_genotypes(raw.data_type,qtl)
436 		      expected_recs[j][geno][qtl]+=
437 			indiv_rec_like[j][geno][qtl];
438 	    } /* end of if (indiv_like > VERY_SMALL) */
439 
440 	} else {  /* n_genotype_vars==0 */
441 	    for (prediction= *mu, c=0; c<n_continuous_vars; c++)
442 		prediction+= data->cont_var[c][i] * qtl_weight[c];
443 	    indiv_like=
444 	      normal_density((data->phenotype[i]-prediction),sigma_sq);
445 	    if (indiv_like > VERY_SMALL) total_log_like+=log10(indiv_like);
446 	}
447 
448 if (debug_qctm) { /* DEBUGGING CODE */
449     int g, q;
450     print("expected_geno: "); for (n=0; n<n_genotype_vars; n++)
451       { sf(ps,"%lf  ",expected_genotype[i][n]); print(ps); } nl();
452     sf(ps,"indiv_like:    %lf   indiv_rec_like:\n",indiv_like); print(ps);
453     for (j=0; j<n_intervals; j++) {
454 	for_locus_genotypes(raw.data_type,q) {
455 	    for_interval_genotypes(raw.data_type,g)
456 	      { sf(ps,"[%d][%d]  ",g,q); pr(); } nl();
457 	    for_interval_genotypes(raw.data_type,g)
458 	      { sf(ps,"%7.5lf ",indiv_rec_like[j][g][q]); pr(); } nl();
459 	}
460     }
461 }
462 
463     } /* end of for i (individuals) */
464 
465 
466     /*** Finally, we fill in the upper right triangle of the S_matrix and
467          the constant entries of the S_matrix and the expected_genotypes.
468 	 These entries correspond to the continuous variables and the base
469 	 trait value (mu) in the regression equation, and are used by
470 	 ML_qtl_weight(), but not ML_qtl_pos(). ***/
471 
472     for (n=0; n<n_genotype_vars; n++) /* fill in upper right */
473       for (k=0; k<n; k++) S_matrix[k][n]= S_matrix[n][k];
474     last= n_genotype_vars + n_continuous_vars;
475 
476     for (i=0; i<n_individuals; i++) expected_genotype[i][last]= 1.0;
477 
478     for (c=0; c<n_continuous_vars; c++) {
479 	entry=n_genotype_vars+c;
480 	for (i=0; i<n_individuals; i++)
481 	  expected_genotype[i][entry]= data->cont_var[c][i];
482 
483 	for (n=0; n<n_genotype_vars; n++) {
484 	    for (sum_exp_genotype=0.0, i=0; i<n_individuals; i++)
485 	      sum_exp_genotype+=data->cont_var[c][i] * expected_genotype[i][n];
486 	    S_matrix[n][entry]=S_matrix[entry][n]= sum_exp_genotype;
487 	}
488 
489 	for (sum_exp_genotype=0.0, i=0; i<n_individuals; i++)
490 	  sum_exp_genotype+=data->cont_var[c][i];
491 	S_matrix[entry][last]=S_matrix[last][entry]= sum_exp_genotype;
492 
493 	x=n_genotype_vars+c;
494 	for (d=0; d<=c; d++) {
495 	    y=n_genotype_vars+d;
496 	    for (sum_exp_genotype=0.0, i=0; i<n_individuals; i++)
497 	      sum_exp_genotype+=data->cont_var[c][i] * data->cont_var[d][i];
498 	    S_matrix[x][y]= S_matrix[y][x]= sum_exp_genotype;
499 	}
500     }
501 
502     for (n=0; n<last; n++) { /* for all genotype_vars and continuous_vars */
503 	for (sum_exp_genotype=0.0, i=0; i<n_individuals; i++)
504 	  sum_exp_genotype+= expected_genotype[i][n];  /* sum for all indivs */
505 	S_matrix[last][n]= S_matrix[n][last]= sum_exp_genotype;  /* mu entry */
506     }
507     S_matrix[last][last]= (real)(n_individuals);
508 
509     return(total_log_like);
510 }
511 
512 
likelihood(qtl_genotype,contribution,trans_prob,qtl_weight,mu,sigma_sq,data,indiv,total_like,total_int_like,rec_like)513 void likelihood(qtl_genotype,contribution,trans_prob,qtl_weight,mu,sigma_sq,
514     data,indiv,total_like,total_int_like,rec_like)
515 int *qtl_genotype;
516 real *contribution;
517 GENO_PROBS *trans_prob;
518 real *qtl_weight;
519 real *mu, *sigma_sq;  	  /* mu and sigma_sq are pointers to single numbers */
520 DATA *data;
521 int indiv;
522 real *total_like;	  /* side-effected - is a  ptr to a single num */
523 real *total_int_like;	  /* side-effected - is also a  ptr to a single num */
524 INTERVAL_GENOTYPE_PROBS *rec_like; /* [interval][int-geno] - side-effected */
525 {
526     real normal_like, prediction, ratio;
527     int j, y, n, c, geno;
528 
529     /* Some elements of the data struct we use in here... */
530     INTERVAL_GENOTYPE_PROBS *indiv_prob= data->genotype_prob[indiv];
531     int *num_int_genos= data->num_genotypes[indiv];
532     INT_POSS_GENOTYPES *poss_geno= data->genotype[indiv];
533         /* poss_geno[interval#][poss-geno#]=> genotype code */
534 
535     /* int_like is used as a temp in here */
536 
537     *total_int_like= 1.0;
538     for (j=0; j<n_intervals; j++) {
539 	int_like[j]= 0.0;
540      /*	for_interval_genotypes(data_type,geno) { EFFECTIVELY */
541  	for (y=0; y<num_int_genos[j]; y++) {
542 	    geno= poss_geno[j][y];
543 	    int_like[j]+= rec_like[j][geno]=
544 	      trans_prob[j][geno][qtl_genotype[j]] * indiv_prob[j][geno];
545 	}
546 	*total_int_like *= int_like[j];
547     }
548 
549     for (n=0, prediction= *mu; n<n_genotype_vars; n++)
550       prediction+= contribution[n] * qtl_weight[n];
551     for (c=0; c<n_continuous_vars; c++) /* ADDED for cont vars */
552       prediction+= data->cont_var[c][indiv] * qtl_weight[n_genotype_vars+c];
553     normal_like= normal_density((data->phenotype[indiv]-prediction),sigma_sq);
554     *total_like= *total_int_like * normal_like;
555 
556     for (j=0; j<n_intervals; j++) {
557 	ratio= (*total_like>VERY_SMALL ? *total_like/int_like[j] : 0.0);
558  	for (y=0; y<num_int_genos[j]; y++) { /* for_interval_genotypes y */
559 	    geno= poss_geno[j][y];
560 	    rec_like[j][geno] *= ratio;
561 	}
562     }
563 
564 if (debug_qctm) { /* DEBUGGING CODE */
565     int x;
566     x=(raw.data_type==INTERCROSS ? 2:1);
567     for (j=0; j<n_intervals; j++) {
568 	sf(ps,"poss_geno=%d cont=(%3.1lf %3.1lf) int_like=%lf rec_like:\n",
569 	   qtl_genotype[j],contribution[x*j],contribution[x*j+1],int_like[j]);
570 	pr();
571     }
572     sf(ps,"total_int_like=%lf norml=%lf total=%lf\n=====\n",
573        *total_int_like,normal_like,*total_like); pr();
574 }
575 
576 }
577 
578 
make_rec_probs(qtl_pos,interval_rf,trans_prob)579 void make_rec_probs(qtl_pos,interval_rf,trans_prob)
580 real *qtl_pos, *interval_rf;
581 GENO_PROBS *trans_prob;
582 {
583     int j, geno, qtl, left, right;
584     real left_rf, right_rf, sum;
585 
586     for (j=0; j<n_intervals; j++) {
587 	left_rf=
588 	  rmaxf(MIN_REC_FRAC,min(qtl_pos[j],MAX_FRAC_OF_RF*interval_rf[j]));
589 	right_rf= (interval_rf[j] - left_rf)/(1 - 2*left_rf);
590 	for_interval_genotypes(raw.data_type,geno) {
591 	    sum= 0.0;
592 	    left= left_genotype[geno];
593 	    right= right_genotype[geno];
594 	    for_locus_genotypes(raw.data_type,qtl) {
595 		sum+= trans_prob[j][geno][qtl]=
596 		  (transition_prob(raw.data_type,left,qtl,left_rf) *
597 		   transition_prob(raw.data_type,qtl,right,right_rf)) /
598 		 transition_prob(raw.data_type,left,right,interval_rf[j]);
599 	    }
600 	    if (fabs(sum-1.0)>0.01)
601 	      print("*** warning: the sum of the trans_probs is not 1.0\n");
602 	}
603     }
604 
605 if (debug_qctm) {
606     for (j=0; j<n_intervals; j++) {
607 	sf(ps,"rec_probs for interval %d rf=%lf\n",j,interval_rf[j]); pr();
608 	for_interval_genotypes(raw.data_type,geno) {
609 	    sf(ps,"%2d ",geno); pr();
610 	    for_locus_genotypes(raw.data_type,qtl)
611 	      { sf(ps,"[%d] %lf ",qtl,trans_prob[j][geno][qtl]); pr(); } nl();
612 	}
613     }
614 }
615 
616 }
617 
618 
normal_density(x,sigma_sq)619 real normal_density(x,sigma_sq)
620 real x,*sigma_sq;
621 {
622     real exponent;
623 
624     if (*sigma_sq < TINY_VARIANCE) {
625 	print("warning: sigma_sq exceeds tiny_variance...\n");
626 	*sigma_sq= TINY_VARIANCE;
627     }
628     exponent = ((x*x)/(2.0 * *sigma_sq))
629              + (0.5 * (log(*sigma_sq))) + log(SQRT_2_PI);
630 
631     if (exponent < BIGGEST_EXPONENT) return(exp(-exponent));
632     else return(TINY_LIKELIHOOD);
633 }
634 
635 
636 
637 /******************** ML_QTL_WEIGHT ********************/
638 
ML_qtl_weight(S_matrix,expected_genotype,phenotype,fix_weight,mu,qtl_weight,sigma_sq)639 void ML_qtl_weight(S_matrix,expected_genotype,phenotype,fix_weight,
640 	mu,qtl_weight,sigma_sq)
641 real **S_matrix, **expected_genotype, *phenotype, *fix_weight;
642 real *mu, *qtl_weight, *sigma_sq;  /* side effect these three */
643 {
644     int n, size, i, j;
645     real total, prediction;
646     /* S_inverse and row are side-effected */
647 
648     size= n_genotype_vars+n_continuous_vars+1;
649 
650 
651     if (!fix_weight_kludge) {
652 	for (n=0; n<size; n++)
653 	  if (fix_weight[n]!=DONT_FIX) kill_entry(S_matrix,size,n);
654 
655 	mat_invert(S_matrix,size,S_inverse);
656 	array_times_matrix(phenotype,expected_genotype,n_individuals,size,
657 			   temp_row);
658 	array_times_matrix(temp_row,S_inverse,size,size,qtl_weight);
659 
660 	for (n=0; n<size; n++)
661 	  if (fix_weight[n]!=DONT_FIX) qtl_weight[n]=fix_weight[n];
662 	*mu= qtl_weight[size-1]; /* last entry is mu */
663 
664     } else { /* if fix_weight_kludge */
665 	for (n=0; n<size-1; n++) qtl_weight[n]= fix_weight[n];
666 	for (i=0, total=0.0; i<n_individuals; i++) {
667 	    for (n=0, prediction= 0.0; n<size-1; n++)
668 	      prediction+= qtl_weight[n] * expected_genotype[i][n];
669 	    total+= phenotype[i] - prediction;
670 	}
671 	*mu= total/((real)n_individuals);
672     }
673 
674     *sigma_sq=
675       variance(phenotype,qtl_weight,expected_genotype,S_matrix,mu,size);
676 }
677 
678 
kill_entry(S_matrix,size,i)679 void kill_entry(S_matrix,size,i)
680 real **S_matrix;
681 int size, i; /* index of entry to kill */
682 {
683     int n;
684 
685     for (n=0; n<size; n++) S_matrix[i][n]= S_matrix[n][i]= 0.0;
686     S_matrix[i][i]= 1.0;
687 }
688 
689 
variance(phenotype,qtl_weight,expected_genotype,S_matrix,mu,size)690 real variance(phenotype,qtl_weight,expected_genotype,S_matrix,mu,size)
691 real *phenotype, *qtl_weight, **expected_genotype, **S_matrix, *mu;
692 int size;
693 {
694     int i, n, m;
695     real var, prediction, term1, term2, pheno;
696 
697     term1= term2= 0.0;
698 
699     for (i=0; i<n_individuals; i++) {
700 	for (n=0, prediction= 0.0; n<size-1; n++)
701 	  prediction+= qtl_weight[n] * expected_genotype[i][n];
702 	pheno = phenotype[i] - *mu;
703 	term1+= pheno*pheno - 2.0*pheno*prediction;
704     }
705 
706     for (n=0; n<size-1; n++) for (m=0; m<size-1; m++)
707       term2+= qtl_weight[n] * qtl_weight[m] * S_matrix[n][m];
708 
709     var = (term1 + term2) / ((real)n_individuals);
710     return(var);
711 }
712 
713 
714 /******************** ML_QTL_POS ********************/
715 
716 #define DIVS 100
717 #define STEP 0.02
718 
ML_qtl_pos(expected_recs,interval_rf,fix_pos,qtl_pos)719 void ML_qtl_pos(expected_recs,interval_rf,fix_pos,qtl_pos)
720 GENO_PROBS *expected_recs;
721 real *interval_rf;
722 real *fix_pos; /* []==DONT_FIX if we don't want to fix it */
723 real *qtl_pos; /* side-effected */
724 {
725     int i;
726     real start, end, interval_cm, pos, max_cm, min_cm;
727 
728     for (i=0; i<n_intervals; i++) {
729 
730 	if (fix_pos[i]!=DONT_FIX) qtl_pos[i]=fix_pos[i];
731 
732 	else if (brute_force) {
733 	/***** Brute force computation to find a maximum ******/
734 
735 	    /* First pass */
736 	    interval_cm= haldane_cm(interval_rf[i]);
737 	    min_cm= start= MIN_CM;
738 	    max_cm= end= haldane_cm(MAX_FRAC_OF_RF*interval_rf[i]);
739 	    pos= do_brute_force(expected_recs,i,interval_rf[i],start,end,DIVS);
740 
741 	    /* Second pass */
742 	    start= rmaxf(pos-STEP*interval_cm,min_cm);
743 	    end= rminf(pos+STEP*interval_cm,max_cm);
744 	    pos= do_brute_force(expected_recs,i,interval_rf[i],start,end,DIVS);
745 
746 	    qtl_pos[i]= unhaldane_cm(pos);
747 	} else /* !brute_force */ send(CRASH);
748     } /* for i (all intervals) */
749 }
750 
751 
do_brute_force(expected_recs,i,interval_rf,start,end,steps)752 real do_brute_force(expected_recs,i,interval_rf,start,end,steps)
753   /* return the best pos in cM */
754 GENO_PROBS *expected_recs;
755 int i; /* interval# */
756 real interval_rf, start, end; /* start and end are positions in cM */
757 int steps;
758 {
759     real inc, pos, like, max_like, best_pos;
760     int j;
761 
762     inc=(end-start)/((real)(steps-1)); max_like= -VERY_BIG;
763     for (j=0, pos=start; j<steps; j++, pos+=inc) {
764 	like= pos_like(unhaldane_cm(pos),interval_rf,expected_recs,i);
765 	if (like>max_like) { max_like=like; best_pos=pos; }
766     }
767     return(best_pos);
768 }
769 
770 
pos_like(left_theta,interval_theta,expected_recs,i)771 real pos_like(left_theta,interval_theta,expected_recs,i)
772 GENO_PROBS *expected_recs;
773 real left_theta, interval_theta;
774 int i; /* the interval# */
775 {
776     real event_like, pos_like, right_theta;
777     int interval_geno, left_geno, right_geno, qtl_geno;
778 
779     right_theta= (interval_theta-left_theta)/(1.0-2.0*left_theta);
780     pos_like=0.0;
781 
782     for_interval_genotypes(raw.data_type,interval_geno) {
783 	left_geno= left_genotype[interval_geno];
784 	right_geno= right_genotype[interval_geno];
785 	for_locus_genotypes(raw.data_type,qtl_geno) {
786 	    event_like= apriori_prob(raw.data_type,left_geno) *
787 	      transition_prob(raw.data_type,left_geno,qtl_geno,left_theta) *
788 	      transition_prob(raw.data_type,qtl_geno,right_geno,right_theta);
789 	    pos_like+=
790 	      log(event_like) * expected_recs[i][interval_geno][qtl_geno];
791 	}
792     }
793     return(pos_like);
794 }
795 
796 
797 /*    real left_recs, right_recs, left_norecs, right_norecs;
798     left_recs= left_norecs= right_recs= right_norecs= 0.0;
799     if (raw.data_type!=BACKCROSS) send(CRASH);
800 	    if (left_geno==qtl_geno)
801 	       left_norecs+= expected_recs[i][interval_geno][qtl_geno];
802 	    else left_recs+= expected_recs[i][interval_geno][qtl_geno];
803 	    if (right_geno==qtl_geno)
804 	       right_norecs+= expected_recs[i][interval_geno][qtl_geno];
805 	    else right_recs+= expected_recs[i][interval_geno][qtl_geno];
806     like= left_recs*log(left_theta)   + left_norecs*log(1.0-left_theta) +
807         + right_recs*log(right_theta) + right_norecs*log(1.0-right_theta); */
808 
809 #ifdef VERY_OLD_CODE
810 
811 	    start= 0.0;
812 	    inc= (haldane_cm(interval_rf[i])/(DIVS-1));
813 	    if (print_brute_force) {
814 		sf(ps,PrBF1,i,interval_rf[i],qtl_pos[i]);
815 		print(ps);
816 	    }
817 	    max_pos=do_brute_force(start,inc,interval_rf[i],
818 				   expected_recs,i,print_brute_force,&unimodal);
819 
820 	    if (!unimodal) {
821 		print("*** warning: pos_likes is not unimodal...\n");
822 		qtl_pos[i]= max_pos;
823 	    } else { /* unimodal */
824 		/* Do it again, with interval= max_pos+/-inc */
825 		start= haldane_cm(max_pos)-inc;  /* rrange takes care */
826 		inc= (2*inc)/(DIVS-1);        /* of fencepost error */
827 		/* if (print_brute_force) { print("second pass:\n"); } */
828 		qtl_pos[i]=do_brute_force(start,inc,interval_rf[i],
829 					  expected_recs,i,FALSE,&unimodal);
830 	    }
831 
832 	    if (print_iter && !print_brute_force) {
833 		sf(ps,PrI,i,interval_rf[i],
834 		   guess_pos(expected_recs,i,interval_rf[i]),
835 		   qtl_pos[i]); print(ps);
836 	    }
837 
do_brute_force(start,inc,theta,expected_recs,interval,do_print,unimodal)838 real do_brute_force(start,inc,theta,expected_recs,interval,do_print,unimodal)
839 real start, inc, theta;
840 GENO_PROBS *expected_recs;
841 int interval, do_print, *unimodal;
842 {
843 	real pos, cm, max_pos, max_like, d, d2, f, prev_f;
844 	int j, max_j, dip, got_f;
845 
846 	max_like= VERY_UNLIKELY; dip= FALSE; *unimodal=TRUE;
847 	for (j=0, cm=start; j<DIVS; j++,cm+=inc) {
848 	    pos=unhaldane_cm(cm);
849 	    if (!rrange(&pos, MIN_REC_FRAC, MAX_FRAC_OF_RF*theta)) continue;
850 	    pos_likes(pos,expected_recs,interval,theta,&d,&d2,&f);
851 	    if (do_print) {
852 		sf(ps,PrBF2,pos,f,dip);print(ps);
853 	    }
854 	    if (f>max_like) {
855 		max_like=f; max_pos=pos; max_j=j;
856 		if (dip) *unimodal= FALSE;
857 	    } else if (f<max_like) dip= TRUE;
858 	}
859 	if (do_print) {
860 	    sf(ps,PrBF3,max_pos,max_like);
861 	    print(ps);
862         }
863 	if (max_like==VERY_UNLIKELY) { send(CRASH); }
864 	return(max_pos);
865 }
866 
867 
pos_likes(lambda,expected_recs,i,theta,deriv,deriv2,f)868 void pos_likes(lambda,expected_recs,i,theta,deriv,deriv2,f)
869 real lambda;
870 GENO_PROBS *expected_recs;
871 int i;
872 real theta;
873 real *deriv, *deriv2, *f;
874 {
875 	real w1,x1,y1,z1,w2,x2,y2,z2;
876 	real lambda_star, one_minus_lambda, one_minus_2_lambda;
877 	real sq_one_minus_2_lambda, theta_minus_lambda;
878 	real one_minus_theta_lambda;
879 	real nr1, nr2, r_1, r_2;
880 
881 	rrange(&lambda,0.001,0.499);
882 
883 	r_1= expected_recs[i][REC][REC]   + expected_recs[i][REC][NOREC];
884 	r_2= expected_recs[i][REC][REC]   + expected_recs[i][NOREC][REC];
885 	nr1= expected_recs[i][NOREC][REC] + expected_recs[i][NOREC][NOREC];
886 	nr2= expected_recs[i][REC][NOREC] + expected_recs[i][NOREC][NOREC];
887 
888 	lambda_star= (theta-lambda)/(1.0-2.0*lambda);
889 	*f= r_1*log(lambda) + r_2*log(lambda_star) +
890 	    nr2*log(1.0-lambda_star) + nr1*log(1.0-lambda);
891 /*
892 	one_minus_lambda=	1.0 - lambda;
893 	one_minus_2_lambda= 	1.0 - (2.0*lambda);
894 	sq_one_minus_2_lambda= 	sq(one_minus_2_lambda);
895 	theta_minus_lambda= 	theta - lambda;
896 	one_minus_theta_lambda= 1.0 - theta - lambda;
897 
898 	w1= r_1 / lambda;
899 	x1= r_2 * (2.0/one_minus_2_lambda - 1.0/theta_minus_lambda);
900 	y1= nr2 * (-(2.0*lambda_star-1.0) / one_minus_theta_lambda);
901 	z1= nr1 * (-1.0 / one_minus_lambda);
902 
903 	w2= -r_1 / sq(lambda);
904 	x2=  r_2 * (4.0/sq_one_minus_2_lambda - 1.0/sq(theta_minus_lambda));
905 	y2= -nr2 * ((-2.0/sq_one_minus_2_lambda) +
906 		    (4.0*theta - 2.0)/
907 			(sq(one_minus_theta_lambda) * one_minus_2_lambda));
908 	z2=  nr1 * (-1.0/sq(one_minus_lambda));
909 
910 	*deriv= w1 + x1 + y1 + z1;
911 	*deriv2= w2 + x2 + y2 + z2;
912 */
913 }
914 
guess_pos(expected_recs,i,theta_interval)915 real guess_pos(expected_recs,i,theta_interval)
916 GENO_PROBS *expected_recs;
917 int i;
918 real theta_interval;
919 {
920 	real total_recs, left_recs, right_recs, L, R, X;
921 	real left_cm, right_cm, cm_pos, pos;
922 	int left, right;
923 
924 	total_recs= left_recs= right_recs= 0.0;
925 	{
926 	    L= ((real)left); R= ((real)right);
927 	    X= expected_recs[i][left][right];
928 	    left_recs+=  L * X;
929 	    right_recs+= R * X;
930 	    total_recs+= (L+R) * X;
931 	}
932 
933 	left_cm=  haldane_cm(rminf(left_recs/total_recs, 0.49));
934 	right_cm= haldane_cm(rminf(right_recs/total_recs,0.49));
935         cm_pos =  haldane_cm(theta_interval) * (left_cm /(left_cm +right_cm));
936 	pos = unhaldane_cm(cm_pos);
937 	return(pos);
938 }
939 
940 #endif
941 
qtl_noconv_to_map(data,map)942 void qtl_noconv_to_map(data,map) /* OBSOLETE, I THINK */
943 DATA *data;
944 QTL_MAP *map; 	/* many parts of this are side-effected */
945 {
946     real old_like, new_like;
947     int i;
948     bool done;
949     /* use externs null_qtl_weight, qctm_qtl_weight, qctm_qtl_pos */
950 
951     set_qctm_globals(data,map);
952     map->chi_sq= map->var_explained= map->log_like= 0.0;
953 
954     /*** COMPUTE THE NULL QTL MAPS ***/
955     map->null_log_like=
956       E_step(map->qtl_pos,null_qtl_weight,&map->null_mu,&map->null_sigma_sq,
957 	     data,expected_genotype,S_matrix,expected_recs);
958     map->no_data_like= 0.0;
959 
960     /*** COMPUTE THE ACTUAL QTL MAP ***/
961     /* qctm_qtl_weight[] was initialized from the map->qtl_weight[] and
962        map->qtl_dominance[] entries by set_qctm_globals() */
963     new_like=
964       E_step(map->qtl_pos,qctm_qtl_weight,&map->mu,&map->sigma_sq,data,
965 	     expected_genotype,S_matrix,expected_recs);
966     fill_in_qtl_map(map,new_like,qctm_qtl_weight);
967 }
968 
969 
970 
971 #ifdef DEBUGGING_CODE
972     printf("Expected Genotype:\n");
973     for (i=0; i<n_individuals; i++) {
974 	printf("%d: ",i);
975 	for (j=0; j<size; j++) printf("%8.5lf ",expected_genotype[i][j]);
976 	printf("\n");
977     }
978     printf("\nS Matrix:\n");
979     for (i=0; i<size; i++) {
980 	printf("%d: ",i);
981 	for (j=0; j<size; j++) printf("%8.5lf ",S_matrix[i][j]);
982 	printf("\n");
983     }
984 #endif
985 
986