1 /******************************************************************************
2
3 ##### # # #### #### # # ##### #### ####
4 # # # # # # # ## ## # # # # #
5 # # # # # # # ## # # # #### #
6 # # ## # # # # # # # # # ### #
7 # ## ## # # # # # # # # # # ### # #
8 # # # #### ####### #### # # ##### #### ### ####
9
10 ******************************************************************************/
11 /* This file is part of MAPMAKER 3.0b, Copyright 1987-1992, Whitehead Institute
12 for Biomedical Research. All rights reserved. See READ.ME for license. */
13
14 #define INC_LIB
15 #define INC_SHELL
16 #define INC_MISC
17 #include "mapm.h"
18
19 /* internal procedures */
20
21 bool print_biglod();
22 void print_lodtable();
23 #define PRINT_HALF_LODTABLE 1
24 #define PRINT_FULL_LODTABLE 2
25 void print_2pt_criteria();
26 void parse_2pt_criteria();
27 void print_lod_line();
28
print_2pt_criteria(str,lod,theta)29 void print_2pt_criteria(str,lod,theta)
30 char *str;
31 real lod, theta;
32 { sf(ps,"%s at min LOD %.2lf, max Distance %s",str,lod,rag(rf2str(theta)));
33 pr(); }
34
parse_2pt_criteria(argptr,lod,theta)35 void parse_2pt_criteria(argptr,lod,theta)
36 char **argptr;
37 real *lod, *theta;
38 {
39 if (!rtoken(argptr,default_lod,lod) || !rrange(lod,0.0,1000.0))
40 error("bad value for min LOD score");
41 if (!rtoken(argptr,default_theta,theta) || !input_dist(theta))
42 error("bad value for max distance");
43 }
44
45
two_point()46 command two_point()
47 {
48 int a, b, num_loci, count, total, *locus=NULL;
49
50 mapm_ready(ANY_DATA,2,UNCRUNCHED_LIST,NULL);
51 nomore_args(0);
52
53 run {
54 count=total=0;
55 alloc_list_of_all_loci(seq,&locus,&num_loci);
56 crunch_locus_list(locus,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,IN_SEQ);
57 total=(num_loci*(num_loci-1))/2;
58
59 for_all_locus_pairs (locus,num_loci,a,b) {
60 keep_user_amused("pair",count++,total);
61 compute_two_pt(a,b);
62 }
63 print("two-point data are available.\n");
64
65 } on_exit {
66 unarray(locus,int);
67 relay_messages;
68 }
69 }
70
71
72 #define GROUP_DIVIDER "-------\n"
73
group()74 command group()
75 {
76 real lodbound, thetabound;
77 int *loci=NULL, num_loci, *linkage_group=NULL, group_size, num_left;
78 int *really_unlinked=NULL, num_unlinked, num_linkage_groups, i;
79
80 mapm_ready(ANY_DATA,2,UNCRUNCHED_LIST,NULL);
81 parse_2pt_criteria(&args,&lodbound,&thetabound);
82 nomore_args(2);
83
84 run {
85 alloc_list_of_all_loci(seq,&loci,&num_loci);
86 crunch_locus_list(loci,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,IN_SEQ);
87 array(linkage_group,num_loci,int);
88 array(really_unlinked,num_loci,int);
89 print_2pt_criteria("Linkage Groups",lodbound,thetabound); nl();
90
91 num_groups= 0; /* The global - set to 0 just in case something dies */
92 num_linkage_groups= num_unlinked= 0;
93 for (i=0; i<num_loci; i++) my_group[i]=NO_GROUP;
94 num_left= num_loci;
95
96 do {
97 get_linkage_group(loci,&num_left,linkage_group,&group_size,
98 lodbound,thetabound);
99 if (group_size>=2) {
100 inv_isort(linkage_group,group_size);
101 if (num_linkage_groups>0) print(GROUP_DIVIDER); else nl();
102 sf(ps,"group%d= ",num_linkage_groups+1); pr();
103 for (i=0; i<group_size; i++) {
104 print(rag(loc2str(linkage_group[i]))); print(" ");
105 my_group[linkage_group[i]]= num_linkage_groups;
106 }
107 nl();
108 num_linkage_groups++;
109 } else {
110 /* it is already in NO_GROUP */
111 really_unlinked[num_unlinked++]= linkage_group[0];
112 }
113 } while (num_left>0);
114
115 if (num_unlinked>0) {
116 if (num_linkage_groups>0) print(GROUP_DIVIDER); else nl();
117 print("unlinked= ");
118 for (i=0; i<num_unlinked; i++) {
119 print(rag(loc2str(really_unlinked[i]))); print(" ");
120 my_group[really_unlinked[i]]= num_linkage_groups;
121 }
122 nl();
123 }
124 num_groups= num_linkage_groups+1; /* last one is "unlinked" group */
125
126 } on_exit {
127 unarray(linkage_group,int);
128 unarray(really_unlinked,int);
129 unarray(loci,int);
130 relay_messages;
131 }
132 }
133
134
135 #define HAP_DEL_NOTE \
136 "note: Deleting halplotype groups containing %s markers!\n"
137 #define HAP_OFF_NOTE "'join haplotypes' is off.\n"
138 #define HAP_ON_NOTE "'join haplotypes' is on.\n"
139 #define HAP_NO_GROUPS "No linkage groups found.\n"
140 #define HAP_NO_HAPLOS "No appropriate haplotype groups found.\n"
141
haplotype()142 command haplotype()
143 {
144 real lodbound, thetabound;
145 int *loci=NULL, num_loci, num_left, *linkage_group=NULL, group_size;
146 int *haplo_group=NULL, num_haplo, num_linkage_groups, num_haplo_groups;
147 int *changed=NULL, num_changed, *obs1=NULL, *obs2=NULL, i;
148 bool find_em, any;
149 char first[TOKLEN+1];
150
151 mapm_ready(F2_DATA,2,UNCRUNCHED_LIST,&num_loci);
152 get_arg(stoken,"",first);
153 if (streq(first,"on")) {
154 find_em=TRUE;
155 parse_2pt_criteria(&args,&lodbound,&thetabound);
156 print_2pt_criteria("Haplotyping",lodbound,thetabound);
157 print("\nHaplotype groups allow NO Obligate Recombinants.\n");
158 } else if (streq(first,"off")) {
159 find_em=FALSE;
160 } else if (nullstr(args)) {
161 for (any=FALSE, i=0; i<raw.num_markers; i++)
162 if (haplotyped(i)) { any=TRUE; break; }
163 if (any) print(HAP_ON_NOTE); else print(HAP_OFF_NOTE);
164 return;
165 } else usage_error(1);
166 nomore_args(0);
167
168 run {
169 array(loci,num_loci,int);
170 array(linkage_group,num_loci,int);
171 array(haplo_group,num_loci,int);
172 array(obs1,raw.data.f2.num_indivs,int);
173 array(obs2,raw.data.f2.num_indivs,int);
174 array(changed,2*raw.num_markers,int);
175 get_list_of_all_loci(seq,loci,&num_loci,num_loci); /* no crunching */
176
177 num_linkage_groups= num_haplo_groups= num_changed= 0;
178 if (!find_em) {
179 num_loci=raw.num_markers;
180 for (i=0; i<raw.num_markers; i++) loci[i]=i;
181 }
182 if (delete_haplo_groups(loci,num_loci,changed,&num_changed)) {
183 sf(ps,HAP_DEL_NOTE,(num_loci==raw.num_markers ? "all":"these"));
184 pr();
185 }
186 if (find_em) {
187 nl(); num_left=num_loci;
188 do {
189 get_linkage_group(loci,&num_left,linkage_group,&group_size,
190 lodbound,thetabound);
191 if (group_size>=2) {
192 sort_loci(linkage_group,group_size);
193 do {
194 find_haplo_group(linkage_group,&group_size,haplo_group,
195 &num_haplo,obs1,obs2);
196 if (num_haplo>=2) {
197 setup_haplo_group(haplo_group,num_haplo);
198 sf(ps,"Haplotype Group %2d: %s= [",
199 ++num_haplo_groups,
200 locname(haplo_first[haplo_group[0]],TRUE));
201 pr();
202 for (i=0; i<num_haplo; i++) {
203 if (i!=0) print(" ");
204 print(rag(locname(haplo_group[i],FALSE)));
205 changed[num_changed++]=haplo_group[i];
206 }
207 print("]\n");
208 } /* num_haplo>2 */
209 } while(group_size>=1);
210 num_linkage_groups++;
211 } /* group_size>2 */
212 } while (num_left>0);
213 nl();
214 if (num_linkage_groups==0) print(HAP_NO_GROUPS);
215 else if (num_haplo_groups==0) print(HAP_NO_HAPLOS);
216 }
217 if (num_changed>0) {
218 if (!find_em) nl();
219 sort_loci(changed,num_changed);
220 bash_mapping_data(changed,num_changed);
221 bash_order_info(changed,num_changed);
222 }
223 for (any=FALSE, i=0; i<raw.num_markers; i++)
224 if (haplotyped(i)) {any=TRUE; break; }
225 if (any) print(HAP_ON_NOTE); else print(HAP_OFF_NOTE);
226
227 } on_exit {
228 unarray(linkage_group,int);
229 unarray(haplo_group,int);
230 unarray(loci,int);
231 unarray(changed,int);
232 unarray(obs1,int);
233 unarray(obs2,int);
234 relay_messages;
235 }
236 }
237
238
unhaplotype()239 command unhaplotype()
240 {
241 int *loci=NULL, num_loci, *changed=NULL, num_changed;
242
243 mapm_ready(F2_DATA,0,0,NULL);
244 run {
245 if (nullstr(args)) usage_error(1);
246 parse_locus_args(&loci,&num_loci); /* error if fails, is uncrunched */
247 array(changed,2*raw.num_markers,int);
248
249 if (delete_haplo_groups(loci,num_loci,changed,&num_changed))
250 { sf(ps,HAP_DEL_NOTE,"these"); pr(); }
251 else print("No haplotype groups to delete\n");
252 if (num_changed>0) {
253 sort_loci(changed,num_changed);
254 bash_mapping_data(changed,num_changed);
255 bash_order_info(changed,num_changed);
256 }
257 } on_exit {
258 unarray(loci,int);
259 unarray(changed,int);
260 relay_messages;
261 }
262 }
263
264
list_haplotypes()265 command list_haplotypes()
266 {
267 int num_loci, *locus=NULL;
268
269 mapm_ready(ANY_DATA,MAYBE_SEQ,UNCRUNCHED_LIST,NULL);
270 run {
271 if (!nullstr(args)) {
272 parse_locus_args(&locus,&num_loci); /* error if fails */
273 if (num_loci==0) error("no loci in arguments\n");
274 } else {
275 if (!alloc_list_of_all_loci(seq,&locus,&num_loci))
276 error(NEED_SEQ_OR_ARGS);
277 }
278 nl();
279 crunch_locus_list(locus,&num_loci,SILENTLY,ANY_CHROMS,0);
280 hold(more_mode) print_haplo_summary(locus,num_loci);
281
282 } on_exit {
283 unarray(locus,int);
284 relay_messages;
285 }
286 }
287
288
list_loci()289 command list_loci()
290 {
291 int source, num_loci, *locus=NULL;
292
293 mapm_ready(ANY_DATA,MAYBE_SEQ,UNCRUNCHED_LIST,NULL);
294 run {
295 if (!nullstr(args)) {
296 parse_locus_args(&locus,&num_loci); /* error if fails */
297 if (num_loci==0) error("no loci in arguments\n");
298 source=IN_ARGS;
299 } else {
300 if (!alloc_list_of_all_loci(seq,&locus,&num_loci))
301 error(NEED_SEQ_OR_ARGS);
302 source=IN_SEQ;
303 }
304 crunch_locus_list(locus,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,source);
305 nl();
306 hold(more_mode) print_locus_summary(locus,num_loci,use_haplotypes);
307
308 } on_exit {
309 unarray(locus,int);
310 relay_messages;
311 }
312 }
313
314
315 #define NOSEX_BIGLODS "\n Marker-1 Marker-2 Theta LOD cM\n"
316 #define SEX_BIGLODS \
317 "\n Marker-1 Marker-2 Theta(M) Theta(F) LOD cM(M) cM(F)\n"
318
biglods()319 command biglods()
320 {
321 int a, b, n, num_loci, *loci=NULL;
322 real lodbound, thetabound;
323
324 mapm_ready(ANY_DATA,2,UNCRUNCHED_LIST,NULL);
325 parse_2pt_criteria(&args,&lodbound,&thetabound);
326 nomore_args(2);
327
328 run {
329 alloc_list_of_all_loci(seq,&loci,&num_loci);
330 crunch_locus_list(loci,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,IN_SEQ);
331 print_2pt_criteria("Linked Marker Pairs",lodbound,thetabound); nl();
332 if (!sex_specific) print(NOSEX_BIGLODS); else print(SEX_BIGLODS);
333 n=0;
334 hold(more_mode) for_all_locus_pairs(loci,num_loci,a,b)
335 if (a!=b &&
336 print_biglod(a,b,lodbound,thetabound,sex_specific,NO_CHROM)) n++;
337 if (n==0) print("no linked markers in sequence\n");
338 } on_exit {
339 unarray(loci,int);
340 relay_messages;
341 }
342 }
343
344
near_locus()345 command near_locus()
346 {
347 int a, b, i, j, n, num_loci, *locus=NULL, *trial_locus=NULL, num_trials;
348 real lodbound, thetabound;
349 char *rest;
350
351 mapm_ready(ANY_DATA,2,UNCRUNCHED_LIST,NULL);
352 if (split_arglist(&rest,':') && !nullstr(rest)) {
353 parse_2pt_criteria(&rest,&lodbound,&thetabound);
354 if (!nullstr(rest)) usage_error(0);
355 } else { lodbound=default_lod; thetabound=default_theta; }
356 if (nullstr(args)) usage_error(0);
357 parse_locus_args(&trial_locus,&num_trials);
358 crunch_locus_list(trial_locus,&num_trials,CRUNCH_WARNINGS,ANY_CHROMS,
359 IN_ARGS);
360
361 run {
362 alloc_list_of_all_loci(seq,&locus,&num_loci);
363 crunch_locus_list(locus,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,IN_SEQ);
364 print_2pt_criteria("Linked Markers in Sequence",lodbound,thetabound);
365 nl();
366 if (!sex_specific) print(NOSEX_BIGLODS); else print(SEX_BIGLODS);
367
368 hold(more_mode) for (j=0; j<num_trials; j++) {
369 a=trial_locus[j]; n=0;
370 for (i=0; i<num_loci; i++) {
371 b=locus[i]; if (a==b) continue;
372 if (print_biglod(a,b,lodbound,thetabound,sex_specific,
373 assignment_chrom(locus[i]))) n++;
374 }
375 if (n==0) { sf(ps," %-10s no linked markers\n",loc2str(a)); pr(); }
376 }
377 } on_exit {
378 unarray(trial_locus,int);
379 unarray(locus,int);
380 relay_messages;
381 }
382 }
383
384
near_chrom()385 command near_chrom() /* should move to auto_cmd.c? */
386 {
387 int a, b, i, n, num_loci, *locus=NULL, chrom;
388 real lodbound, thetabound;
389 char str[MAXLINE+1];
390
391 mapm_ready(ANY_DATA,2,LIST_SEQ,&num_loci);
392 chrom=get_chrom_arg(TRUE);
393 parse_2pt_criteria(&args,&lodbound,&thetabound);
394 nomore_args(2);
395
396 run {
397 alloc_list_of_all_loci(seq,&locus,&num_loci);
398 crunch_locus_list(locus,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,IN_SEQ);
399 if (chrom==NO_CHROM) strcpy(str,"Linked Markers in Data Set");
400 else sf(str,"Linked Markers on Chromosome %s",chrom2str(chrom));
401 print_2pt_criteria(str,lodbound,thetabound); nl();
402 if (!sex_specific) print(NOSEX_BIGLODS); else print(SEX_BIGLODS);
403
404 hold(more_mode) for (i=0; i<num_loci; i++) {
405 a=locus[i]; n=0;
406 for (b=0; b<raw.num_markers; b++)
407 if ((!use_haplotypes || !haplotype_subordinate(b)) &&
408 (chrom==NO_CHROM || assigned_to(b,chrom)))
409 if (a!=b &&
410 print_biglod(a,b,lodbound,thetabound,sex_specific,
411 assignment_chrom(b)))
412 n++;
413 if (n==0) { sf(ps," %-10s no linked markers\n",loc2str(a)); pr(); }
414 }
415 } on_exit {
416 unarray(locus,int);
417 relay_messages;
418 }
419 }
420
421
lodtable()422 command lodtable()
423 {
424 int *loci=NULL, num_loci, type, source;
425 char name[TOKLEN+1];
426
427 mapm_ready(ANY_DATA,MAYBE_SEQ,UNCRUNCHED_LIST,&num_loci);
428 get_one_arg(stoken,"half",name);
429 if (matches(name,"half")) type=PRINT_HALF_LODTABLE;
430 else if (matches(name,"full")) type=PRINT_FULL_LODTABLE;
431 else usage_error(1);
432
433 run {
434 if (!nullstr(args)) {
435 parse_locus_args(&loci,&num_loci); /* error if fails */
436 if (num_loci==0) error("no loci in arguments\n");
437 source=IN_ARGS;
438 } else {
439 if (!alloc_list_of_all_loci(seq,&loci,&num_loci))
440 error(NEED_SEQ_OR_ARGS);
441 source=IN_SEQ;
442 }
443 crunch_locus_list(loci,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,source);
444 hold(more_mode) print_lodtable(loci,loci,num_loci,num_loci,type);
445
446 } on_exit {
447 unarray(loci,int);
448 relay_messages;
449 }
450 }
451
452
pairwise()453 command pairwise()
454 {
455 int i, j, a, b, num_trials, *try_me=NULL, num_loci, *loci=NULL;
456 bool sex;
457
458 mapm_ready(ANY_DATA,2,UNCRUNCHED_LIST,&num_loci);
459 run {
460 if (nullstr(args)) usage_error(0);
461 parse_locus_args(&try_me,&num_trials);
462 crunch_locus_list(try_me,&num_trials,CRUNCH_WARNINGS,ANY_CHROMS,
463 IN_ARGS);
464 alloc_list_of_all_loci(seq,&loci,&num_loci);
465 crunch_locus_list(loci,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,IN_SEQ);
466
467 hold(more_mode) print_lodtable(try_me,loci,num_trials,num_loci,
468 PRINT_FULL_LODTABLE);
469 } on_exit {
470 unarray(try_me,int);
471 unarray(loci,int);
472 relay_messages;
473 }
474 }
475
476
print_biglod(i,j,lodbound,thetabound,sex,chrom)477 bool print_biglod(i,j,lodbound,thetabound,sex,chrom)
478 int i, j;
479 real lodbound, thetabound;
480 bool sex;
481 int chrom;
482 {
483 real lod, theta, thetam, thetaf;
484
485 if (!sex) {
486 if (!get_two_pt(i,j,&lod,&theta)) return(FALSE);
487 if (lod>=lodbound && theta<=thetabound) {
488 sf(ps," %-10s %-10s %s %s %s",loc2str(i),loc2str(j),
489 rsd(5.2,theta),rsd(5.2,lod),rsd(6.2,cm_dist(theta))); pr();
490 if (chrom!=NO_CHROM) { sf(ps," (%s)",chrom2str(chrom)); pr(); }
491 nl();
492 return(TRUE);
493 } else return(FALSE);
494 } else { /* sex */
495 if (!get_sex_two_pt(i,j,&lod,&thetam,&thetaf)) return(FALSE);
496 if (lod>=lodbound && (thetam<=thetabound || thetaf<=thetabound)) {
497 sf(ps," %-10s %-10s %s %s %s %s %s",
498 loc2str(i),loc2str(j),
499 rsd(5.2,thetam),rsd(5.2,thetaf),rsd(5.2,lod),
500 cm_dist(thetam),cm_dist(thetaf)); pr();
501 if (chrom!=NO_CHROM) { sf(ps," (%s)",chrom2str(chrom)); pr(); }
502 nl();
503 return(TRUE);
504 } else return(FALSE);
505 }
506 }
507
508
print_lodtable(locus1,locus2,num_loci1,num_loci2,how)509 void print_lodtable(locus1,locus2,num_loci1,num_loci2,how)
510 int *locus1, *locus2, num_loci1, num_loci2, how;
511 {
512 int across, down, num_across, num_done, across_to_print;
513 char *line1, *line2;
514
515 print("Bottom number is LOD score, top number is ");
516 if (units==CENTIMORGANS) print("centimorgan distance:\n");
517 else print("recombination fraction:\n");
518
519 num_done=0;
520 do {
521 num_across= (num_loci1>10 ? 10:num_loci1);
522 across_to_print= num_loci1 - (how==PRINT_HALF_LODTABLE ? 1:0);
523
524 nl();
525 if (!print_names) {
526 print(" ");
527 for (across=0; across<num_across; across++) {
528 if (across!=across_to_print) {
529 sf(ps,"%s ",loc2str(locus1[across]));
530 print(ps);
531 }
532 }
533 } else {
534 /* hack in names to look nice but also fit as before */
535 line1 = get_temp_string();
536 line2 = get_temp_string();
537 sf(line1," ");
538 sf(line2," ");
539 for (across=0; across<num_across; across++) {
540 if (across!=across_to_print) {
541 if (across%2==0) {
542 sf(ps,"%s ",loc2str(locus1[across]));
543 strcat(line1,ps);
544 strcat(line2," ");
545 } else {
546 sf(ps,"%s",loc2str(locus1[across]));
547 strcat(line2,ps);
548 }
549 }
550 }
551 print(line1); nl();
552 print(line2);
553 }
554
555 nl();
556 for (down=0; down<num_loci2; down++)
557 print_lod_line(locus2[down],locus1,num_across,how,num_done,down);
558 nl();
559
560 num_loci1-= num_across;
561 num_done+= num_across;
562 if (num_loci1>0) locus1+=num_across;
563 } while (num_loci1>0);
564 }
565
566
print_lod_line(loc,toploc,topnum,how,topref,downref)567 void print_lod_line(loc,toploc,topnum,how,topref,downref)
568 int loc, *toploc, topnum, how, topref, downref;
569 {
570 int across, stop_at;
571 real lod, theta, thetam, thetaf;
572
573 if (how==PRINT_HALF_LODTABLE) {
574 if (topref >= downref) return;
575 stop_at= ((downref-topref)>topnum ? topnum:(downref - topref));
576 } else stop_at= topnum;
577
578 if (sex_specific) {
579 if (print_names) print("\n ");
580 else print("\n ");
581 for (across=0; across<stop_at; across++) {
582 if (sex_specific) {
583 if (get_sex_two_pt(loc,toploc[across],&lod,&thetam,&thetaf))
584 sf(ps," %s",rf2str(thetam));
585 else
586 sf(ps," ");
587 print(ps);
588 } else {
589 /* print for UNLINKED_TWO_PT Similarly, test all return values
590 from get_two_pt and get_sex_two_pt below */
591 }
592 }
593 }
594 sf(ps,"\n%s",loc2str(loc));
595 print(ps);
596 for (across=0; across<stop_at; across++) {
597 if (sex_specific) {
598 if (get_sex_two_pt(loc,toploc[across],&lod,&thetam,&thetaf))
599 sf(ps,"%s ",rf2str(thetaf));
600 else
601 sf(ps," - ");
602 } else {
603 if (get_two_pt(loc,toploc[across],&lod,&theta))
604 sf(ps,"%s ",rf2str(theta));
605 else
606 sf(ps," - ");
607 }
608 print(ps);
609 }
610 if (print_names) print("\n ");
611 else print("\n ");
612 for (across=0; across<stop_at; across++) {
613 if (sex_specific) {
614 if (get_sex_two_pt(loc, toploc[across], &lod, &thetam, &thetaf))
615 sf(ps," %s",rsd(5.2,lod));
616 else
617 sf(ps," ");
618 } else {
619 if (get_two_pt(loc,toploc[across],&lod,&theta))
620 sf(ps," %s",rsd(5.2,lod));
621 else
622 sf(ps," ");
623 }
624 print(ps);
625 }
626 nl();
627 }
628
629
630
631 #define THREE_NO_GRPS "no linkage groups of 3 or more loci found\n"
632 #define THREE_NO_TRIPS "no linked triplets found in %d linkage group%s\n"
633 #define THREE_DO_THIS "%d linked triplet%s in %d linkage group%s\n"
634 #define THREE_HEAD "Triplet criteria: LOD %.2lf, Max-Dist %s, #Linkages %d\n"
635 #define TRIPLET_SEX FALSE
636
three_point()637 command three_point()
638 {
639 int i, num_loci, *loci=NULL, num_trips, num_groups, num_links;
640 int *linkage_group=NULL, group_size, num_unlinked, three_locus[3], foo;
641 SEQ_NODE *three_seq;
642 MAP *map=NULL;
643 real three_like[3], lodbound2, thetabound2;
644 real lodbound3, thetabound3;
645
646 mapm_ready(ANY_DATA,3,LIST_SEQ,&num_loci);
647 nomore_args(0);
648
649 /* these are the defaults */
650 lodbound2= default_lod; thetabound2= default_theta;
651 lodbound3= triplet_lod; thetabound3= triplet_theta;
652 num_links= triplet_num_links;
653
654 print_2pt_criteria("Linkage Groups",lodbound2,thetabound2); nl();
655 sf(ps,THREE_HEAD,lodbound3,rag(rf2str(thetabound3)),num_links); pr();
656 if (triplet_error_rate==0.0) print("'triple error detection' is off.\n");
657 else if (triplet_error_rate==LOCUS_ERROR_RATE)
658 print("'triple error detection' is on.\n");
659 else {
660 print("'triple error detection' in three-point analysis is on.\n");
661 sf(ps,"'error probability' for all loci is fixed at %.2lf%%.\n",
662 triplet_error_rate*100.0);
663 }
664 /* add: args! (what should they be?) pre-allocating 3pt, use_3pt=off msg,
665 smart recalc, keep user amused, print thresholds in banner */
666
667
668 run {
669 alloc_list_of_all_loci(seq,&loci,&num_loci); /* may delete haps! */
670 array(linkage_group,num_loci,int);
671 map= allocate_map(3);
672
673 print("counting..."); flush();
674 num_trips= num_groups= 0;
675 num_unlinked= num_loci;
676 do {
677 get_linkage_group(loci,&num_unlinked,linkage_group,&group_size,
678 lodbound2,thetabound2);
679 if (group_size>=3) {
680 num_groups++;
681 /* compute acceptable three-point orders in the group */
682 sort_loci(linkage_group,group_size);
683 for_all_3pt_seqs (linkage_group,group_size,three_seq) {
684 get_list_of_all_loci(three_seq,three_locus,&foo,3);
685 if (three_linked(three_locus,lodbound3,thetabound3,
686 num_links,TRIPLET_SEX)) num_trips++;
687 }
688 }
689 } while(num_unlinked>0);
690
691 if (num_groups==0) { sf(ps,THREE_NO_GRPS); pr(); abort_command(); }
692 else if (num_trips==0) {
693 sf(ps,THREE_NO_TRIPS,num_groups,maybe_s(num_groups)); pr();
694 abort_command();
695 } /* else */
696 sf(ps,THREE_DO_THIS,num_trips,maybe_s(num_trips),num_groups,
697 maybe_s(num_groups)); pr();
698
699 if (!print_names) {
700 print("\n log-likelihood differences\n");
701 print(" count markers a-b-c b-a-c a-c-b\n");
702 /* 12345: 1234 1234 1234 */
703 } else {
704 print("\n log-likelihood differences\n");
705 print(" count markers a-b-c b-a-c a-c-b\n");
706 /* 12345: 123456789 123456789 123456789 */
707 }
708
709 /* do it again, for real this time... */
710 get_list_of_all_loci(seq,loci,&num_unlinked,num_loci);
711 crunch_locus_list(loci,&num_loci,SILENTLY,ANY_CHROMS,IN_SEQ);
712 num_trips=0;
713 do {
714 get_linkage_group(loci,&num_unlinked,linkage_group,&group_size,
715 lodbound2,thetabound2);
716 if (group_size>=3) {
717 /* compute acceptable three-point orders in the group */
718 inv_isort(linkage_group,group_size);
719 for_all_3pt_seqs (linkage_group,group_size,three_seq) {
720 get_list_of_all_loci(three_seq,three_locus,&foo,3);
721 if (three_linked(three_locus,lodbound3,thetabound3,
722 num_links,TRIPLET_SEX)) {
723 compute_3pt(three_seq,TRIPLET_SEX,triplet_error_rate,
724 three_like,map);
725 if (!print_names) {
726 sf(ps,"%5d: %d %d %d",++num_trips,
727 three_locus[0]+1,three_locus[1]+1,
728 three_locus[2]+1);
729 pad_to_len(ps,23); pr();
730 } else {
731 sf(ps,"%5d: %s %s %s",++num_trips,
732 raw.locus_name[three_locus[0]],
733 raw.locus_name[three_locus[1]],
734 raw.locus_name[three_locus[2]]);
735 pad_to_len(ps,38); pr();
736 }
737 sf(ps,"%s %s %s\n",rsd(6.2,three_like[0]),
738 rsd(6.2,three_like[1]),rsd(6.2,three_like[2]));
739 pr();
740 }
741 }
742 }
743 } while(num_unlinked>0);
744
745 } on_exit {
746 free_map(map);
747 unarray(loci,int);
748 unarray(linkage_group,int);
749 three_pt_touched= TRUE;
750 relay_messages;
751 }
752 }
753
754
forget_three_point()755 command forget_three_point()
756 {
757 mapm_ready(ANY_DATA,0,0,NULL);
758 nomore_args(0);
759 bash_all_three_pt(raw.num_markers);
760 print("All three-point data forgotten.\n");
761 }
762
763
764 #define INF_CRITERIA \
765 "Informativeness: min #Individuals %d%s, min Distance %s\n"
766
suggest_subset()767 command suggest_subset()
768 {
769 real lodbound, thetabound, seed_like;
770 int *loci=NULL, num_loci, *linkage_group=NULL, group_size, groups_done;
771 int *subset=NULL, subset_size, num_unlinked, i, prev;
772
773 mapm_ready(F2_DATA,3,LIST_SEQ,&num_loci); /* F2 because #indivs */
774 parse_2pt_criteria(&args,&lodbound,&thetabound);
775 nomore_args(0);
776
777 print_2pt_criteria("Informative Subgroups",lodbound,thetabound); nl();
778 sf(ps,INF_CRITERIA,npt_min_indivs,(npt_codominant ? " (codominant)":""),
779 rag(rf2str(npt_min_theta))); pr();
780
781 run {
782 alloc_list_of_all_loci(seq,&loci,&num_loci);
783 array(linkage_group,num_loci,int);
784 array(subset,num_loci,int);
785
786 num_orders=0; /* global - deletes all pre-existing orders */
787 for (i=0; i<raw.num_markers; i++) order_next[i]=NO_LOCUS;
788
789 num_unlinked=num_loci;
790 groups_done=0;
791 nl();
792 do {
793 get_linkage_group(loci,&num_unlinked,linkage_group,&group_size,
794 lodbound,thetabound);
795 if (group_size>=3) {
796 if (groups_done!=0) print(GROUP_DIVIDER);
797 sort_loci(linkage_group,group_size);
798 groups_done++;
799 sf(ps,"Linkage group %d: ",groups_done); pr();
800 for (i=0; i<group_size; i++) {
801 print(rag(loc2str(linkage_group[i])));
802 if (i!=group_size-1) print(" ");
803 } nl();
804
805 informative_subset(linkage_group,group_size,
806 npt_min_indivs,npt_min_theta,npt_codominant,
807 use_haplotypes,subset,&subset_size);
808
809 if (subset_size<1)
810 print("No informative markers.\n");
811 else if (subset_size==group_size)
812 print("All markers are informative.\n");
813 else {
814 sf(ps,"%d Marker%s in informative subset:\n",
815 subset_size,maybe_s(subset_size)); pr();
816 }
817
818 sf(ps,"order%d= ",groups_done); pr();
819 if (subset_size==0) print("none");
820 for (i=0; i<subset_size; i++) {
821 print(rag(loc2str(subset[i])));
822 if (i!=subset_size-1) print(" ");
823 if (i==0) order_first[groups_done-1]=subset[i];
824 else order_next[prev]=subset[i];
825 prev=subset[i]; /* lint warning is OK */
826 } nl();
827 unorder_first[groups_done-1]=NO_LOCUS;
828 } /* if group size */
829 } while (num_unlinked>0);
830
831 if (groups_done==0) print("No Linkage Groups Found.\n");
832 num_orders= groups_done;
833
834 } on_exit {
835 unarray(loci,int);
836 unarray(linkage_group,int);
837 unarray(subset,int);
838 relay_messages;
839 }
840 }
841