1 /******************************************************************************
2
3 #### ##### ## # # ####
4 # # # # # # # # # #
5 # # # # # # # # #
6 # # # ##### ###### # ## # ### #
7 # # # # # # ## ## ### # #
8 ### # # # # # # # ### ####
9
10 ******************************************************************************/
11 /* This file is part of MAPMAKER 3.0b, Copyright 1987-1992, Whitehead Institute
12 for Biomedical Research. All rights reserved. See READ.ME for license. */
13
14 /* QTL raw data */
15
16 #define INC_LIB
17 #define INC_SHELL
18 #define INC_QLOWLEVEL
19 #define INC_CALLQCTM
20 #define INC_QTOPLEVEL
21 #include "qtl.h"
22
23 /* External statics */
24 RAW raw;
25 int **make_interval_genotype;
26 int *left_genotype, *right_genotype;
27 real *map_space;
28 int *order;
29 int segregation_distortion;
30 bool altered_chroms;
31 bool already_printed;
32
33 /* Internal interfaces */
34 char name_chars[70];
35 int nam_len;
36 #define LEFT 0
37 #define RIGHT 1
38 #define F2VERSION 3
39
40 void getdataln();
41 void read_map_locus();
42 void read_quant_trait();
43 real interval_likelihood();
44 real pheno_given_geno();
45
46 void make_count_recs_etc();
47
48 bool missing_data(); /* OBSOLETE, NOT USED */
49 void initial_prob();
50 void condition();
51 void altered_chroms_message();
52
53 #define MAXEXCEED "max # indiv or inter exceeded"
54
55 #define FILE_MISMATCH "\
56 The '.data' and '.trait' files were not prepared from the same raw\n\
57 data file. You will need to re-prepare your data in MAPMAKER."
58
59 #define OLD_FILENUM "\
60 This file was prepared using an older version of MAPMAKER. You will\n\
61 need to re-prepare it using the 'prepare data' command in MAPMAKER."
62
63 /********** PRELIMINARIES **********/
64
raw_init()65 void raw_init()
66 {
67 raw.n_loci= 0; raw.n_traits= 0;
68 raw.trait= NULL; raw.trait_name= NULL;
69 raw.locus= NULL; raw.locus_name= NULL;
70 raw.map_dist= NULL; raw.data_type= NO_DATA; raw.f3=FALSE;
71 raw.left_cond_prob= NULL; raw.right_cond_prob= NULL;
72 raw.file[0]='\0'; raw.trait_eqn= NULL;
73 strcpy(default_backcross_chars, "ABH-");
74 strcpy(default_intercross_chars,"ABCDH-");
75 altered_chroms = FALSE;
76 already_printed = FALSE;
77 make_count_recs_etc();
78 }
79
make_count_recs_etc()80 void make_count_recs_etc()
81 {
82 int **x, *y;
83
84 matrix(make_interval_genotype,MAX_LOCUS_GENOTYPES,MAX_LOCUS_GENOTYPES,int);
85 array(left_genotype,MAX_INTERVAL_GENOTYPES,int);
86 array(right_genotype,MAX_INTERVAL_GENOTYPES,int);
87
88 x=make_interval_genotype;
89 x[A][A]=AA; x[A][H]=AH; x[A][B]=AB;
90 x[H][A]=HA; x[H][H]=HH; x[H][B]=HB;
91 x[B][A]=BA; x[B][H]=BH; x[B][B]=BB;
92
93 y=left_genotype;
94 y[AA]= y[AB]= y[AH]= A;
95 y[HA]= y[HB]= y[HH]= H;
96 y[BA]= y[BB]= y[BH]= B;
97
98 y=right_genotype;
99 y[AA]= y[BA]= y[HA]= A;
100 y[AH]= y[BH]= y[HH]= H;
101 y[AB]= y[BB]= y[HB]= B;
102 }
103
104
free_raw()105 void free_raw() /* alloced by read_data - NEVER BEEN TESTED!! */
106 {
107 unmatrix(raw.locus, raw.n_indivs, char);
108 unmatrix(raw.locus_name, raw.n_loci, char);
109 unmatrix(raw.trait, raw.n_indivs, real);
110 unmatrix(raw.trait_name, raw.max_traits, char);
111 unmatrix(raw.trait_eqn, raw.max_traits, char);
112 unmatrix(raw.left_cond_prob, raw.n_indivs, LOCUS_GENOTYPE_PROBS);
113 unmatrix(raw.right_cond_prob, raw.n_indivs, LOCUS_GENOTYPE_PROBS);
114 unarray(raw.map_dist, real);
115 raw.n_loci= raw.n_traits= raw.n_indivs= raw.name_len= 0;
116 raw.file[0]='\0';
117 }
118
data_loaded()119 int data_loaded() { return(raw.file[0]!='\0'); }
120
121
122
123 /********** THE DATA READER **********/
124
getdataln(fp)125 void getdataln(fp) /* get next nonblank/noncomment data file line */
126 FILE *fp;
127 { do { fgetln_(fp); BADDATA_line_num++; } while(nullstr(ln) || ln[0]=='#');
128 BADDATA_ln= ln; }
129 real read_map_distance();
130 void read_map_locus();
131 void read_quant_trait();
132
133 #define DUMMY_LOCI 1
134
read_data(fpa,fpb,fpm,temp,number_of_file)135 void read_data(fpa,fpb,fpm,temp,number_of_file)
136 int number_of_file;
137 FILE *fpa, *fpb, *fpm;
138 char *temp;
139 {
140
141 int k=0,v,l,i,seq_size,e,hist_size,num_chrom,num_loc=0,t_loc=0,j=0,loc;
142 int mapnum,name_len=80, checker, mapm_loci;
143 int n_indivs, n_loci, n_traits, random_check1, random_check2, num_entries;
144 real ma=0,rf;
145 char *size_of_sequence,number_of_lines[TOKLEN+1],*name,*err;
146 char num_of_chroms[TOKLEN+1],pointer_to_lines[TOKLEN+1];
147 char all_str[5*SEQ_LEN], current_chrom[SEQ_LEN];
148 char default_intercross_chars[10], default_backcross_chars[10];
149
150 name = NULL;
151 run {
152 getdataln(fpa);
153 sscanf(ln,"%*ld %*d %d\n",&mapm_loci);
154 mapm_loci *= 2;
155 if(mapm_loci < 1) error("insufficient genetic data for analysis\n");
156
157 array(map_space,mapm_loci,real);
158 array(order,mapm_loci,int);
159 array(name, NAME_LEN+1, char);
160
161 strcpy(default_backcross_chars, "ABH-");
162 strcpy(default_intercross_chars,"ABCDH-");
163
164 getdataln(fpm);
165 while(nstrcmp(ln, "*Chromosomes:", 13) != 0) getdataln(fpm);
166
167
168 /***** DON'T BOTHER WITH THESE MAPM VARIABLES - JUST USE QTL DEFAULTS *****/
169 /**********
170 while(strcmp(ln, "*Print names: 0") != 0 && strcmp(ln,"*Print names: 1") != 0)
171 getdataln(fpa);
172 sscanf(ln,"%*s %*s %d",&print_names);
173 fscanf(fpa,"%*s %*s %*d\n");
174 fscanf(fpa,"%*s %*s %d\n",&print_maps);
175 fscanf(fpa,"%*s %*s %d\n",&segregation_distortion);
176 fscanf(fpa,"%*s %d\n",&mapnum);
177 fscanf(fpa,"%*s %*s %d\n",&wizard_mode);
178
179 getdataln(fpa);
180 getdataln(fpa);
181 getdataln(fpa);
182 **********/
183
184 stoken(&ln, sREQUIRED, num_of_chroms); /* get rid of "*Chromosomes:" */
185 itoken(&ln, iREQUIRED, &num_chrom);
186 if(num_chrom == 0)
187 error("no linkage maps saved...QTL cannot use this data yet");
188 raw.n_chroms = num_chrom;
189 array(raw.chrom_start,num_chrom,int);
190 array(raw.chrom_n_loci,num_chrom,int);
191 array(raw.original_locus,mapm_loci,int);
192 e=0;
193 /* Now I loop through the chromosomes in order to get all the
194 important loci, and the map distances between them */
195 strcpy(all_str,"");
196 dum_loc=0;
197 for(i=0;i<num_chrom;i++) {
198 current_chrom[0]='\0';
199 getdataln(fpm); /* should be name and sequence line */
200 while(ln[0] != '*') getdataln(fpm); /* just to make sure */
201 stoken(&ln,sREQUIRED,name); /* keep for named sequence */
202 itoken(&ln, iREQUIRED, &num_loc);
203 raw.chrom_n_loci[i] = num_loc;
204 if (!print_mapm_loci) {
205 if (DUMMY_LOCI) sf(ps,"%d-%d",t_loc+2,t_loc+num_loc+1);
206 else sf(ps, "%d-%d",t_loc+1,t_loc+num_loc);
207 strcat(all_str,ps);
208 strcat(all_str," ");
209 }
210 if (DUMMY_LOCI) {
211 dum_loc++;
212 num_loc++;
213 }
214 if (!print_mapm_loci)
215 name_sequence(name,ps,&err); /* set chrom names */
216
217 t_loc += num_loc;
218 getdataln(fpm);
219 if (DUMMY_LOCI) {
220 checker = -1;
221 order[j] = -1;
222 j++;
223 for(v=0;v<num_loc-1;v++) {
224 if (nullstr(ln)) getdataln(fpm);
225 itoken(&ln, iREQUIRED, &loc);
226 if (checker == -1) {
227 raw.chrom_start[i] = loc;
228 checker = 1;
229 }
230 order[j] = loc;
231 if (print_mapm_loci) {
232 sf(ps,"%d ",loc+1);
233 strcat(current_chrom, ps);
234 }
235 j++;
236 }
237 if (print_mapm_loci) {
238 name_sequence(name,current_chrom,&err);
239 strcat(all_str,&name[1]);
240 strcat(all_str," ");
241 }
242 map_space[k] = .499; k++;
243 for (l=0; l<num_loc-2; l++,k++)
244 map_space[k] = read_map_distance(fpm,l);
245 map_space[k] = 0.499; k++;
246 getdataln(fpm);
247 }
248 else {
249 checker = -1;
250 for(v=0;v<num_loc;v++) {
251 if (nullstr(ln)) getdataln(fpm);
252 itoken(&ln, iREQUIRED, &loc);
253 if (checker == -1) {
254 raw.chrom_start[i] = loc;
255 checker = 1;
256 }
257 order[j] = loc;
258 if (print_mapm_loci) {
259 sf(ps,"%d ",loc);
260 strcat(current_chrom, ps);
261 }
262 j++;
263 }
264 if (print_mapm_loci) {
265 name_sequence(name,current_chrom,&err);
266 strcat(all_str,current_chrom);
267 strcat(all_str,current_chrom);
268 }
269 if (DUMMY_LOCI) {
270 map_space[k] = .499; k++;
271 }
272 for (l=0; l<num_loc-1; l++,k++)
273 map_space[k] = read_map_distance(fpm,l);
274 map_space[k] = 0.499; k++;
275 getdataln(fpm);
276 }
277 }
278 if (DUMMY_LOCI) {
279 order[j]= -1; /* Add dummy locus to end */
280 map_space[k] = 0.499; k++; t_loc++;
281 }
282 name_sequence("*all",all_str,&err);
283 frewind(fpa);
284 getdataln(fpa);getdataln(fpa);
285 strcpy(raw.file,temp);
286 raw.n_loci = t_loc;
287 if (!(itoken(&ln,iREQUIRED,&random_check1) && random_check1>0 &&
288 itoken(&ln,iREQUIRED,&n_indivs) && n_indivs>0 &&
289 itoken(&ln,iREQUIRED,&n_loci) && n_loci>0))
290 send(BADDATA);
291 if (random_check1%10 != F2VERSION)
292 error(OLD_FILENUM);
293 raw.n_indivs = n_indivs;
294
295
296 /*if(max_individuals<0) {
297 if (max_individuals>MAX_INDIVIDUALS)
298 { why_(MAXEXCEED); send(BADDATA); }
299 else max_individuals= imaxf(n_indivs+1,0-max_individuals);
300 } else if (max_individuals>0 && n_indivs>max_individuals)
301 { why_(MAXEXCEED); send(BADDATA); }*/
302
303 matrix(raw.locus_name,t_loc,name_len,char);
304 matrix(raw.locus,n_indivs,t_loc,char);
305 array(raw.map_dist,t_loc,real);
306 matrix(raw.left_cond_prob, n_indivs, t_loc,
307 LOCUS_GENOTYPE_PROBS);
308 matrix(raw.right_cond_prob, n_indivs, t_loc,
309 LOCUS_GENOTYPE_PROBS);
310 for(i=0;i<t_loc;i++) raw.map_dist[i] = map_space[i];
311 read_map_locus(fpa,n_indivs,t_loc,order,n_loci);
312
313 /* Now read the traits and QTL state variables from .trait file */
314
315 getdataln(fpb);
316 itoken(&ln, iREQUIRED, &random_check2);
317 if (random_check2 != random_check1)
318 error(FILE_MISMATCH);
319 getdataln(fpb);
320 itoken(&ln, iREQUIRED, &n_traits);
321 raw.filenumber = random_check2;
322 raw.n_traits = n_traits;
323 raw.max_traits= MAX_TRAITS(n_traits);
324 matrix(raw.trait,n_indivs,raw.max_traits,real);
325 matrix(raw.trait_name,raw.max_traits,name_len,char);
326 matrix(raw.trait_eqn,raw.max_traits,TRAIT_EQN_LEN+1,char);
327 for (j=0; j<n_traits; j++)
328 read_quant_trait(fpb,j,n_indivs);
329 getdataln(fpb); /* Skips over line "#QTL only variables" */
330 sscanf(ln,"%*s %*s %*s %d\n",&print_mapm_loci);
331 fscanf(fpb,"%*s %*s %lf\n",&like_tolerance);
332 fscanf(fpb,"%*s %*s %d\n", &brute_force);
333 fscanf(fpb,"%*s %*s %d\n", &max_intervals);
334 fscanf(fpb,"%*s %*s %*s %d\n", &max_continuous_vars);
335 fscanf(fpb,"%*s %*s %d\n",&max_wiggles);
336 fscanf(fpb,"%*s %*s %d\n",&max_compares);
337 fscanf(fpb,"%*s %*s %d\n",&units);
338 fscanf(fpb,"%*s %d\n",&num_chrom);
339
340 /* Loop through the chromosomes, checking to ensure
341 no changes since QTL was last used */
342
343
344 if (num_chrom == 0) {}
345 else {
346 if(num_chrom != raw.n_chroms) {
347 altered_chroms_message();
348 }
349 if (DUMMY_LOCI) k = 1;
350 else k=0;
351 for(i=0;i<num_chrom;i++) {
352 j = 0;
353 getdataln(fpb);
354 sscanf(ln, "%*s %d", &num_loc);
355 if(num_loc != raw.chrom_n_loci[i])
356 altered_chroms_message();
357 getdataln(fpb);
358 checker = -1;
359 for(v=0;v<num_loc;v++) {
360 if (nullstr(ln)) getdataln(fpb);
361 itoken(&ln, iREQUIRED, &loc);
362 if (checker == -1) {
363 if(loc != raw.chrom_start[i])
364 altered_chroms_message();
365 checker = 1;
366 }
367 }
368 for(v=0;v<num_loc-1;v++,k++) {
369 if(nullstr(ln)) getdataln(fpb);
370 rtoken(&ln, rREQUIRED, &rf);
371 if(rf != map_space[k]) {
372 if(!(map_space[k] == .499 && rf >= .49))
373 altered_chroms_message();
374 }
375 }
376 if (DUMMY_LOCI) k++;
377 k++;
378 getdataln(fpb);getdataln(fpb);
379 }
380 }
381
382
383
384 fscanf(fpb,"%*s %*s %*s %d\n",&num_contexts);
385 fscanf(fpb,"%*s %*s %d\n",&active_context);
386 for(i = 0; i < num_contexts; i++) {
387 fscanf(fpb,"%*s %*d\n");
388 fscanf(fpb,"%*s %d\n", &trait);
389 if(!altered_chroms) {
390 fscanf(fpb,"%*s %*s %d\n",&num_entries);
391 load_table(context[i]->named_sequences,
392 fpb,INDEX_BY_NAME,num_entries);
393 fscanf(fpb,"%*s %*s %d\n",&num_entries);
394 load_table(context[i]->sequence_history,
395 fpb,INDEX_BY_NUMBER,num_entries);
396 context[i]->seq_history_num=
397 context[i]->sequence_history->next_entry_num;
398 } else {
399 context[i]->seq_history_num = 0;
400 }
401 }
402 } on_exit {
403 unarray(name, char);
404 relay_messages;
405 } return;
406 }
407
408
save_traitfile(fp)409 void save_traitfile(fp)
410 FILE *fp;
411 {
412 char *name,mapnum;
413 int i,j,loci_tot,map_tot,usenum;
414
415 run {
416 sf(ps,"%d mapmaker trait data\n",raw.filenumber);
417 fpr(fp);
418 sf(ps,"%d\n", raw.n_traits);
419 fpr(fp);
420
421 for(i=0;i<raw.n_traits;i++) {
422 sf(ps,"*%-9s",raw.trait_name[i]);fpr(fp);
423
424 if (!nullstr(raw.trait_eqn[i]))
425 { sf(ps," =%s\n",raw.trait_eqn[i]);fpr(fp); }
426
427 for(j=0;j<raw.n_indivs;j++) {
428 if(j % 5 == 0 && j != 0)
429 fprint(fp,WRS("\n "));
430 sf(ps,"%12.3lf ", raw.trait[j][i]);fpr(fp);
431 }
432 fnl(fp);
433 }
434
435 fprint(fp,WRS("#QTL only variables:\n"));
436 sf(ps,"*Print mapm loci: %d\n", print_mapm_loci);fpr(fp);
437 sf(ps,"*Like tolerance: %lf\n",like_tolerance);fpr(fp);
438 sf(ps,"*Brute force: %d\n",brute_force);fpr(fp);
439 sf(ps,"*Max intervals: %d\n",max_intervals);fpr(fp);
440 sf(ps,"*Max continuous vars: %d\n",max_continuous_vars);fpr(fp);
441 sf(ps,"*Max wiggles: %d\n",max_wiggles);fpr(fp);
442 sf(ps,"*Max compares: %d\n",max_compares);fpr(fp);
443 sf(ps,"*Default units: %d\n",units); fpr(fp);
444 sf(ps,"*Chromosomes: %d\n", raw.n_chroms);
445 fpr(fp);
446 loci_tot = 0;map_tot = 0;
447 for (i=0;i<raw.n_chroms;i++) {
448 if (DUMMY_LOCI) {
449 loci_tot++;
450 map_tot++;
451 }
452 sf(ps, "chr%d %d\n", i+1, raw.chrom_n_loci[i]); fpr(fp);
453 for(j=0;j<raw.chrom_n_loci[i];j++) {
454 if(j % 18 == 0 && j != 0)
455 fprint(fp,WRS("\n"));
456 sf(ps, "%d ",order[loci_tot]);fpr(fp);
457 loci_tot++;
458 }
459 fnl(fp);
460 for(j=0;j<raw.chrom_n_loci[i]-1;j++) {
461 if(j % 12 == 0 && j != 0)
462 fprint(fp,WRS("\n"));
463 sf(ps, "%.4lf ",map_space[map_tot]);fpr(fp);
464 map_tot++;
465 }
466 fnl(fp);
467 map_tot++;
468 fprint(fp,WRS("0\n"));
469 fprint(fp,WRS("0\n"));
470 }
471 sf(ps,"*Number of contexts: %d\n",num_contexts);
472 fpr(fp);
473 sf(ps,"*Active context: %d\n",active_context);
474 fpr(fp);
475 fnl(fp);
476 for(i = 0; i < num_contexts; i++) {
477 sf(ps,"*Context %d\n",i+1); fpr(fp);
478 sf(ps,"*Trait: %d\n",trait); fpr(fp);
479 sf(ps,"*Named Sequences: %d\n",
480 count_table_entries(context[i]->named_sequences));
481 fpr(fp);
482
483 save_table(context[i]->named_sequences,fp,INDEX_BY_NAME);
484 sf(ps,"*Sequence history: %d\n",
485 count_table_entries(context[i]->sequence_history));
486 fpr(fp);
487 save_table(context[i]->sequence_history,fp,INDEX_BY_NUMBER);
488 }
489 } on_exit {
490 relay_messages;
491 }
492 }
493
494
read_map_locus(fp,indivs,t_loc,order,n_loci)495 void read_map_locus(fp,indivs,t_loc,order,n_loci)
496 FILE *fp;
497 int indivs, n_loci, *order;
498 /* could send BADDATA or IOERROR */
499 {
500
501 char **temp_set,c, nam[TOKLEN+1], *namp;
502 int j,k,i,name_len=80,t,num_of_terms = 0;
503 matrix(temp_set,n_loci,80,char);
504 for(i=0;i<t_loc;i++) {
505 if (order[i] == -1) {
506 sf(ps,"ter%d",num_of_terms);
507 num_of_terms++;
508 strcpy(raw.locus_name[i],ps);
509 for(k=0;k<indivs;k++)
510 raw.locus[k][i] = '-';
511 }
512 }
513 for (j=0;j<n_loci;j++) {
514 getdataln(fp);
515 while(ln[0] != '*') { getdataln(fp); }
516 t=j;
517 for(i=0;i<t_loc;i++) {
518 if (t == order[i]) {
519 t = i;
520 raw.original_locus[t] = order[i]+1;
521 self_delimiting = ptr_to("");
522 if (!stoken(&ln, sREQUIRED, nam))
523 { send(BADDATA); }
524 strcpy(raw.locus_name[t],&nam[1]);
525 /* read the locus' data */
526 for (k=0; k<indivs; k++) {
527 if (nullstr(ln)) getdataln(fp);
528 if (!parse_char(&ln,geno_chars,TRUE,&c))
529 { send(BADDATA); }
530 else if (c == 'B' && raw.data_type == BACKCROSS)
531 { raw.locus[k][t]= 'H'; }
532 else
533 { raw.locus[k][t]= c; }
534 }
535 if (!nullstr(ln)) { send(BADDATA); }
536 break;
537 }
538 }
539 }
540 }
541
542
read_map_distance(fp,num)543 real read_map_distance(fp,num)
544 FILE *fp;
545 int num;
546 {
547 int bar;
548 real map_dist;
549 if (nullstr(ln)) getdataln(fp);
550 if (!rtoken(&ln, rREQUIRED, &map_dist))
551
552 /* REMOVED !rrange(&raw.map_dist[num],0.0,0.5)) */
553 { why_("bad map distance"); send(BADDATA); }
554
555 /* THIS WAS A MAJOR KLUDGE - BUT IT IS TEMPORARY! */
556 if (map_dist > 0.5) {
557 map_dist= unhaldane_cm(map_dist);
558 }
559 /* sf(ps,"%-3d %lf\n",num,raw.map_dist[num]); print(ps); */
560 rrange(&map_dist,MIN_REC_FRAC,MAX_REC_FRAC);
561 return(map_dist);
562 }
563
564
read_quant_trait(fp,num,indivs)565 void read_quant_trait(fp,num,indivs)
566 FILE *fp;
567 int num, indivs;
568 /* could send IOERROR or BADDATA */
569 {
570
571 char tok[TOKLEN+1], c;
572 int i;
573
574 getdataln(fp);
575 /* read the locus name */
576 if (!nstoken(&ln, sREQUIRED, tok, NAME_LEN)) { send(BADDATA); }
577 strcpy(raw.trait_name[num],&tok[1]); raw.trait_eqn[num][0]='\0';
578 if (parse_char(&ln,"=",TRUE,&c)) {
579 if (nullstr(ln)) send(BADDATA);
580 nstrcpy(raw.trait_eqn[num],ln,TRAIT_EQN_LEN);
581 ln = NULL;
582 }
583 /* read its data */
584 for (i=0; i<indivs; i++) {
585 if (nullstr(ln)) getdataln(fp);
586 if (!rtoken(&ln,rREQUIRED,&raw.trait[i][num])) {
587 if (!stoken(&ln,sREQUIRED,tok) || strcmp(tok,"-"))
588 { send(BADDATA); }
589 else raw.trait[i][num]= MISSING_PHENO;
590 }
591 }
592 if (!nullstr(ln)) { send(BADDATA); }
593 }
594
595
596 void read_oldmap_locus(), read_oldquant_trait(), read_oldmap_distance();
597
read_olddata(fp,path)598 void read_olddata(fp,path)
599 FILE *fp;
600 char *path;
601 {
602 int i, j, k, n_indivs, n_loci, n_traits, pos;
603 char nam[TOKLEN], c, tok[TOKLEN + 1];
604 real val;
605 extern int nam_len;
606
607 BADDATA_line_num= 0;
608 if (!nullstr(raw.file)) free_raw();
609
610 run {
611 getdataln(fp);
612 if (!(itoken(&ln,iREQUIRED,&n_indivs) && n_indivs>0 &&
613 itoken(&ln,iREQUIRED,&n_loci) && n_loci>0 &&
614 itoken(&ln,iREQUIRED,&n_traits) && n_traits>0 &&
615 itoken(&ln,iREQUIRED,&nam_len) &&
616 nam_len>0 && nam_len<=TOKLEN))
617 { why_("bad header or header entries");
618 send(BADDATA); }
619
620
621 raw.n_loci= n_loci;
622 raw.n_traits= n_traits;
623 raw.n_indivs= n_indivs;
624 raw.name_len= nam_len;
625 raw.left_cond_prob= NULL;
626 raw.right_cond_prob=NULL;
627
628 matrix(raw.locus, n_indivs, n_loci, char);
629 matrix(raw.locus_name, n_loci, nam_len, char);
630 array(raw.map_dist, n_loci, real);
631 raw.max_traits=MAX_TRAITS(raw.n_traits);
632 matrix(raw.trait,n_indivs,raw.max_traits,real);
633 matrix(raw.trait_name,raw.max_traits,nam_len,char);
634 matrix(raw.left_cond_prob, n_indivs, n_loci, LOCUS_GENOTYPE_PROBS);
635 matrix(raw.right_cond_prob, n_indivs, n_loci,LOCUS_GENOTYPE_PROBS);
636
637 for (j=0; j<n_loci; j++)
638 read_oldmap_locus(fp,j,n_indivs);
639 for (j=0; j<n_traits; j++)
640 read_oldquant_trait(fp,j,n_indivs);
641
642 getdataln(fp); stoken(&ln,sREQUIRED,tok);
643 if (!streq(tok,"map:"))
644 { why_("expected 'MAP:'"); send(BADDATA); }
645
646 for (i=0; i<n_loci-1; i++) read_oldmap_distance(fp,i);
647
648 if (!nullstr(ln) || !end_of_text(fp))
649 { why_("data after logical end of file"); send(BADDATA); }
650
651 close_file(fp);
652 nstrcpy(raw.file,path,PATH_LENGTH);
653
654 } when_aborting {
655 free_raw();
656 raw.file[0]='\0';
657 relay;
658 }
659 }
660
661
read_oldmap_locus(fp,num,indivs)662 void read_oldmap_locus(fp,num,indivs)
663 FILE *fp;
664 int num, indivs;
665 /* could send BADDATA or IOERROR */
666 {
667 char c, nam[TOKLEN+1], *namp;
668 int i;
669
670 /* read the locus name */
671 getdataln(fp);
672 if (!field(&ln, nam_len, raw.locus_name[num]))
673 { why_("missing locus name"); send(BADDATA); }
674
675 /* read the locus' data */
676 for (i=0; i<indivs; i++) {
677 if (nullstr(ln)) getdataln(fp);
678 if (!parse_char(&ln,geno_chars,TRUE,&c))
679 { why_("expected a genotype character"); send(BADDATA); }
680 else raw.locus[i][num]= c;
681 }
682 if (!nullstr(ln)) { why_("extra data"); send(BADDATA); }
683 }
684
685
read_oldquant_trait(fp,num,indivs)686 void read_oldquant_trait(fp,num,indivs)
687 FILE *fp;
688 int num, indivs;
689 /* could send IOERROR or BADDATA */
690 {
691 char tok[TOKLEN+1];
692 int i;
693
694 getdataln(fp);
695 /* read the locus name */
696 if (!field(&ln, nam_len, raw.trait_name[num]))
697 { why_("bad locus name field"); send(BADDATA); }
698
699 /* read its data */
700 for (i=0; i<indivs; i++) {
701 if (nullstr(ln)) getdataln(fp);
702 if (!rtoken(&ln,rREQUIRED,&raw.trait[i][num])) {
703 if (!stoken(&ln,sREQUIRED,tok) || strcmp(tok,"-"))
704 { why_("bad trait data"); send(BADDATA); }
705 else raw.trait[i][num]= MISSING_PHENO;
706 }
707 }
708 if (!nullstr(ln)) { why_("extra data"); send(BADDATA); }
709 }
710
read_oldmap_distance(fp,num)711 void read_oldmap_distance(fp,num)
712 FILE *fp;
713 int num;
714 {
715 int bar;
716
717 if (nullstr(ln)) getdataln(fp);
718 if (!rtoken(&ln, rREQUIRED, &raw.map_dist[num]))
719 /* REMOVED !rrange(&raw.map_dist[num],0.0,0.5)) */
720 { why_("bad map distance"); send(BADDATA); }
721
722 /* THIS WAS A MAJOR KLUDGE - BUT IT IS TEMPORARY! */
723 if (raw.map_dist[num] > 0.5) {
724 bar= (int) (raw.map_dist[num] + 0.499);
725 raw.map_dist[num]= unkosambi((real) bar);
726 }
727 /* sf(ps,"%-3d %lf\n",num,raw.map_dist[num]); print(ps); */
728 rrange(&raw.map_dist[num],MIN_REC_FRAC,MAX_REC_FRAC);
729 }
730
731
732
733
734
735
736
737
738 /*** THE NEW AND IMPROVED RAW DATA CRUNCHER - USED AFTER LOADING DATA ****/
739
740 #define PROBS_NOT_EQ \
741 "*** WARNING: left and right probs unequal: indiv %d locus %d genotype %d\n"
742 #define PROBS_NOT_1 \
743 "*** WARNING: interval genotype probs not 1: indiv %d interval %d\n"
744
crunch_data()745 void crunch_data() /* side effects the raw data struct */
746 {
747 int i, k, first, last;
748
749 real r[3], l[3], right, left;
750 INTERVAL_GENOTYPE_PROBS *indiv_probs;
751 int geno, foo;
752
753 first=0; last=raw.n_loci-1;
754 for (i=0; i<raw.n_indivs; i++) {
755
756 /* get left conditioned probs */
757 initial_prob(raw.left_cond_prob[i],first,raw.locus[i][first]);
758 for (k=1; k<=last; k++)
759 condition(raw.left_cond_prob[i],k,k-1,raw.locus[i][k],
760 raw.map_dist[k-1],LEFT);
761
762 /* and now the right conditioned probs */
763 for_locus_genotypes(raw.data_type,geno)
764 raw.right_cond_prob[i][last][geno] =
765 pheno_given_geno(raw.locus[i][last],geno);
766 for (k=last-1; k>=first; k--)
767 condition(raw.right_cond_prob[i],k,k+1,raw.locus[i][k],
768 raw.map_dist[k],RIGHT);
769 }
770
771 /* DEBUGGING STUFF: We now check the L-R- conditioning */
772
773 array(indiv_probs,2,INTERVAL_GENOTYPE_PROBS);
774 for (i=0; i<raw.n_indivs; i++)
775 for (k=first; k<last-1; k++) {
776 r[A]=r[B]=r[H]=l[A]=l[B]=l[H]=left=right=0.0;
777
778 indiv_interval_probs(indiv_probs,0,i,k,k+1,raw.map_dist[k]);
779 for_interval_genotypes(raw.data_type,geno)
780 l[right_genotype[geno]]+= indiv_probs[0][geno];
781
782 indiv_interval_probs(indiv_probs,1,i,k+1,k+2,raw.map_dist[k+1]);
783 for_interval_genotypes(raw.data_type,geno)
784 r[left_genotype[geno]]+= indiv_probs[1][geno];
785
786 for_locus_genotypes(raw.data_type,geno) {
787 if (fabs(l[geno]-r[geno])>=0.01)
788 { sf(ps,PROBS_NOT_EQ,i+1,k+2,geno); pr(); }
789 left+=l[geno]; right+=r[geno];
790 }
791 if (fabs(left-1.0)>=0.01) { sf(ps,PROBS_NOT_1,i+1,k+1); pr(); }
792 if (fabs(right-1.0)>=0.01) { sf(ps,PROBS_NOT_1,i+1,k+2); pr(); }
793
794 /* sf(ps,"Indiv = %d Locus = %d left=%lf right=%lf\n",i+1,k+2,
795 left,right); pr();*/
796 /* for_interval_genotypes(raw.data_type,foo)
797 { sf(ps,"geno %d: left_int_prob=%lf right_int_prob=%lf\n",
798 foo,indiv_probs[0][foo],indiv_probs[1][foo]); pr(); }
799 */
800 /* for_locus_genotypes(raw.data_type,foo)
801 { sf(ps,"geno %d lcp=%lf rcp=%lf\n",foo,
802 raw.left_cond_prob[i][k+1][foo],
803 raw.right_cond_prob[i][k+1][foo]); pr(); }
804 */
805 }
806
807 }
808
809
initial_prob(prob,locus,this_genotype)810 void initial_prob(prob,locus,this_genotype)
811 LOCUS_GENOTYPE_PROBS *prob;
812 int locus;
813 char this_genotype;
814 {
815 int i;
816 prob[locus][A]= prob[locus][B]= prob[locus][H]= 0.0;
817
818 if (raw.data_type==BACKCROSS)
819 switch (this_genotype) {
820 case 'A': prob[locus][A]=1.0; break;
821 case 'B':
822 case 'H': prob[locus][H]=1.0; break;
823 case '-': prob[locus][A]=0.5; prob[locus][H]=0.5; break;
824 default: send(CRASH);
825 }
826 else if (raw.data_type==INTERCROSS && !raw.f3)
827 switch (this_genotype) {
828 case 'A': prob[locus][A]=1.0; break;
829 case 'B': prob[locus][B]=1.0; break;
830 case 'H': prob[locus][H]=1.0; break;
831 case 'C': prob[locus][H]=2.0/3.0; prob[locus][B]=1.0/3.0; break;
832 case 'D': prob[locus][H]=2.0/3.0; prob[locus][A]=1.0/3.0; break;
833 case '-': prob[locus][A]=0.25; prob[locus][B]=0.25;
834 prob[locus][H]=0.5; break;
835 default: send(CRASH);
836 }
837 else if (raw.data_type==INTERCROSS && raw.f3)
838 switch (this_genotype) {
839 case 'A': prob[locus][A]=1.0; break;
840 case 'B': prob[locus][B]=1.0; break;
841 case 'H': prob[locus][H]=1.0; break;
842 case 'C': prob[locus][H]=0.4; prob[locus][B]=0.6; break;
843 case 'D': prob[locus][H]=0.4; prob[locus][A]=0.6; break;
844 case '-': prob[locus][A]=3.0/8.0; prob[locus][B]=3.0/8.0;
845 prob[locus][H]=0.25; break;
846 default: send(CRASH);
847 }
848 else send(CRASH);
849 }
850
pheno_given_geno(observation,genotype)851 real pheno_given_geno(observation,genotype)
852 char observation; /* eg the 'phenotype' */
853 int genotype;
854 {
855 if (raw.data_type==BACKCROSS)
856 switch (observation) {
857 case 'A': return((genotype==A) ? 1.0 : 0.0);
858 case 'B':
859 case 'H': return((genotype==H) ? 1.0 : 0.0);
860 case '-': return(1.0);
861 default: send(CRASH);
862 }
863 else if (raw.data_type==INTERCROSS && !raw.f3)
864 switch (observation) {
865 case 'A': return((genotype==A) ? 1.0 : 0.0);
866 case 'B': return((genotype==B) ? 1.0 : 0.0);
867 case 'H': return((genotype==H) ? 1.0 : 0.0);
868 case 'C': return((genotype!=A) ? 1.0 : 0.0);
869 case 'D': return((genotype!=B) ? 1.0 : 0.0);
870 case '-': return(1.0);
871 default: send(CRASH);
872 }
873 else if (raw.data_type==INTERCROSS && raw.f3) /* no different??? */
874 switch (observation) {
875 case 'A': return((genotype!=B) ? 1.0 : 0.0);
876 case 'B': return((genotype!=A) ? 1.0 : 0.0);
877 case 'H': return((genotype==H) ? 1.0 : 0.0);
878 case 'C': return((genotype!=A) ? 1.0 : 0.0);
879 case 'D': return((genotype!=B) ? 1.0 : 0.0);
880 case '-': return(1.0);
881 default: send(CRASH);
882 }
883 else send(CRASH);
884 }
885
886
condition(prob,locus,prev_locus,observation,rec_frac,side)887 void condition(prob,locus,prev_locus,observation,rec_frac,side)
888 LOCUS_GENOTYPE_PROBS *prob;
889 int locus, prev_locus;
890 char observation;
891 real rec_frac;
892 int side;
893 {
894 int i, geno_was, geno_is, max_recs,geno1,geno2;
895 real rec, norec, total;
896
897 for (i=0; i<MAX_LOCUS_GENOTYPES; i++) prob[locus][i]=0.0;
898 rec= rec_frac; norec= 1.0-rec_frac;
899 total= 0.0;
900
901 for_locus_genotypes(raw.data_type,geno_was)
902 for_locus_genotypes(raw.data_type,geno_is) {
903 geno1 = (side==LEFT) ? geno_was:geno_is;
904 geno2 = (side==LEFT) ? geno_is :geno_was;
905 prob[locus][geno_is]+=
906 prob[prev_locus][geno_was] *
907 transition_prob(raw.data_type,geno1,geno2,rec_frac);
908 }
909 for_locus_genotypes(raw.data_type,geno_is)
910 total += prob[locus][geno_is] *= pheno_given_geno(observation,geno_is);
911 for_locus_genotypes(raw.data_type,geno_is) prob[locus][geno_is]/=total;
912 }
913
914
transition_prob(data_type,geno_was,geno_is,theta)915 real transition_prob(data_type,geno_was,geno_is,theta)
916 int data_type, geno_was, geno_is;
917 real theta;
918 {
919 INTERVAL_GENOTYPE_PROBS prob;
920 real x, y, z, sum;
921 real C2, D2, E2, F2, G2, C3, D3, E3, F3, G3;
922
923 if (data_type==BACKCROSS) {
924 if (geno_was==geno_is) return(1.0-theta);
925 else return(theta);
926
927 } else if (data_type==INTERCROSS && !raw.f3) {
928 switch(geno_was) {
929 case A: switch(geno_is) {
930 case A: return(sq(1.0-theta));
931 case B: return(sq(theta));
932 case H: return(2.0*theta*(1.0-theta));
933 }
934 case B: switch(geno_is) {
935 case A: return(sq(theta));
936 case B: return(sq(1.0-theta));
937 case H: return(2.0*theta*(1.0-theta));
938 }
939 case H: switch(geno_is) {
940 case A: return(theta*(1.0-theta));
941 case B: return(theta*(1.0-theta));
942 case H: return(sq(theta)+sq(1.0-theta));
943 }
944 }
945
946 } else if (data_type==INTERCROSS && raw.f3) { /* haldane & waddington */
947 /* OK BONEHEAD - Why did I put I's in here??? */
948 C2= 0.5*sq(1.0-theta);
949 D2= 0.5*sq(theta);
950 E2= theta*(1.0-theta);
951 F2= sq(1.0-theta);
952 G2= sq(theta);
953
954 C3= C2 + 0.5*E2 + 0.25*sq(1.0-theta)*F2 + 0.25*sq(theta)*G2;
955 D3= D2 + 0.5*E2 + 0.25*sq(theta)*F2 + 0.25*sq(1.0-theta)*G2;
956 E3= 0.5*E2 + 0.5*theta*(1.0-theta)*(F2 + G2);
957 F3= 0.5*sq(1.0-theta)*F2 + 0.5*sq(theta)*G2;
958 G3= 0.5*sq(theta)*F2 + 0.5*sq(1.0-theta)*G2;
959
960 switch(geno_was) {
961 case A:
962 sum= C3 + D3 + E3;
963 switch(geno_is) {
964 case A: return(C3/sum);
965 case B: return(D3/sum);
966 case H: case I: return(0.5*(E3/sum));
967 default: send(CRASH);
968 }
969 case B:
970 sum= C3 + D3 + E3;
971 switch(geno_is) {
972 case B: return(C3/sum);
973 case A: return(D3/sum);
974 case H: case I: return(0.5*(E3/sum));
975 default: send(CRASH);
976 }
977 case H: case I:
978 sum= 2.0*E3 + F3 + G3;
979 switch(geno_is) {
980 case A: return(E3/sum);
981 case B: return(E3/sum);
982 case H: case I:
983 return( (geno_was==geno_is ? F3:G3)/sum);
984 default: send(CRASH);
985 }
986 default: send(CRASH);
987 }
988
989 #ifdef ANOTHER_SHOT
990 } else if (data_type==INTERCROSS && raw.f3) { /* new look */
991 x= sq(1.0-theta); y=theta*(1.0-theta); z=sq(theta);
992 switch(geno_was) {
993 case A:
994 switch(geno_is) {
995 case A: return((1.0/3.0)*(sq(x) + sq(z) + 2.0*y + 2.0*x));
996 case B: return((2.0/3.0)*(x*z + y + z));
997 case H: return((2.0/3.0)*(y*x + y*z + y));
998 default: send(CRASH);
999 }
1000 case B:
1001 switch (geno_is) {
1002 case B: return((1.0/3.0)*(sq(x) + sq(z) + 2.0*y + 2.0*x));
1003 case A: return((2.0/3.0)*(x*z + y + z));
1004 case H: return((2.0/3.0)*(y*x + y*z + y));
1005 default: send(CRASH);
1006 }
1007 case H:
1008 switch(geno_is) {
1009 case A: return(y*(1.0+x+z));
1010 case B: return(y*(1.0+x+z));
1011 case H: return(x*(x+z) + z*(x+z));
1012 default: send(CRASH);
1013 }
1014 default: send(CRASH);
1015 }
1016 #endif
1017 #ifdef YET_ANOTHER
1018 } else if (data_type==INTERCROSS && raw.f3) { /* From Transition Matrix */
1019 x= sq(1.0-theta); y=theta*(1.0-theta); z=sq(theta);
1020 sum=0.0;
1021 switch(geno_was) {
1022 case A:
1023 sum+= prob[AA]= 0.25*x + 0.25*y + 0.125*x*x +0.125*z*z;
1024 sum+= prob[AH]= 0.25*y + 0.25*x*y + 0.25*z*y;
1025 sum+= prob[AB]= 0.25*y + 0.25*z + 0.25*x*z;
1026 switch(geno_is) {
1027 case A: return(prob[AA]/sum);
1028 case H: return(prob[AH]/sum);
1029 case B: return(prob[AB]/sum);
1030 default: send(CRASH);
1031 }
1032 case H:
1033 sum+= prob[HA]= 0.25*y + 0.25*x*y + 0.25*z*y;
1034 sum+= prob[HH]= 0.25*x*x + 0.25*z*z + 0.25*x*z + 0.25*z*x;
1035 sum+= prob[HB]= 0.25*y + 0.25*x*y + 0.25*z*y;
1036 switch(geno_is) {
1037 case A: return(prob[HA]/sum);
1038 case H: return(prob[HH]/sum);
1039 case B: return(prob[HB]/sum);
1040 default: send(CRASH);
1041 }
1042 case B:
1043 sum+= prob[BB]= 0.25*x + 0.25*y + 0.125*x*x +0.125*z*z;
1044 sum+= prob[BH]= 0.25*y + 0.25*x*y + 0.25*z*y;
1045 sum+= prob[BA]= 0.25*y + 0.25*z + 0.25*x*z;
1046 switch(geno_is) {
1047 case A: return(prob[BA]/sum);
1048 case H: return(prob[BH]/sum);
1049 case B: return(prob[BB]/sum);
1050 default: send(CRASH);
1051 }
1052 default: send(CRASH);
1053 }
1054 #endif
1055 } else send(CRASH);
1056
1057 }
1058
1059
map_length(left,right)1060 real map_length(left,right) /* return rf dist from left to right */
1061 int left, right; /* Assumes check_interval() has succeeded */
1062 {
1063 real total_cm, total_rf;
1064 int i,temp;
1065
1066
1067 /* if (zero_interval(left,right)) return(MIN_REC_FRAC); */
1068 if (right==INF_LOCUS) return(MAX_REC_FRAC);
1069 if (left+1==right) return(raw.map_dist[left]);
1070
1071 for (i=left, total_cm=0.0; i<right; i++) {
1072 if (raw.map_dist[i]>MAX_REC_FRAC) return(MAX_REC_FRAC);
1073 else total_cm+= haldane_cm(raw.map_dist[i]);
1074 if (total_cm>MAX_CM) return(MAX_REC_FRAC);
1075 }
1076 total_rf= unhaldane_cm(total_cm);
1077 rrange(&total_rf,MIN_REC_FRAC,MAX_REC_FRAC);
1078 return(total_rf);
1079 }
1080
1081
indiv_interval_probs(prob,data_indiv_num,raw_indiv_num,left_locus_num,right_locus_num,theta)1082 void indiv_interval_probs(prob,data_indiv_num,raw_indiv_num,left_locus_num,
1083 right_locus_num,theta)
1084 INTERVAL_GENOTYPE_PROBS *prob; /* [num][geno-code] => real */
1085 int data_indiv_num, raw_indiv_num;
1086 int left_locus_num, right_locus_num;
1087 real theta; /* provided as an argument for efficiency */
1088 {
1089 real sum;
1090 int interval_geno, left, right;
1091
1092 sum= 0.0;
1093 for_interval_genotypes(raw.data_type,interval_geno) {
1094 left= left_genotype[interval_geno];
1095 right=right_genotype[interval_geno];
1096 sum+= prob[data_indiv_num][interval_geno]=
1097 raw.left_cond_prob[raw_indiv_num][left_locus_num][left] *
1098 transition_prob(raw.data_type,left,right,theta) *
1099 raw.right_cond_prob[raw_indiv_num][right_locus_num][right];
1100 }
1101
1102 for_interval_genotypes(raw.data_type,interval_geno) {
1103 if(sum == 0.0) print("sum = 0\n");
1104 prob[data_indiv_num][interval_geno]/= sum;
1105 }
1106 }
1107
1108
apriori_prob(data_type,geno)1109 real apriori_prob(data_type,geno)
1110 int data_type, geno;
1111 {
1112 if (data_type==BACKCROSS) return(0.5);
1113
1114 else switch(geno) {
1115 case A: return(0.25);
1116 case B: return(0.25);
1117 case H: return(0.50);
1118 }
1119 }
1120
1121
altered_chroms_message()1122 void altered_chroms_message()
1123 {
1124 if(!already_printed) {
1125 print("The linkage map data in the '.data' file has been altered since its\n");
1126 print("last usage. The saved scan results and saved names are obsolete and\n");
1127 print("will not be loaded.\n");
1128 already_printed = TRUE;
1129 }
1130 altered_chroms = TRUE;
1131 }
1132
1133