1 /******************************************************************************
2
3 # # ##### ##### #### # # ##### #### ####
4 ## # # # # # # ## ## # # # # #
5 # # # # # # # # ## # # # #### #
6 # # # ##### # # # # # # # ### #
7 # ## # # # # # # # # # # ### # #
8 # # # # ####### #### # # ##### #### ### ####
9
10 ******************************************************************************
11 /* This file is part of MAPMAKER 3.0b, Copyright 1987-1992, Whitehead Institute
12 for Biomedical Research. All rights reserved. See READ.ME for license. */
13
14 #define INC_LIB
15 #define INC_SHELL
16 #define INC_MISC
17 #include "mapm.h"
18
19 /* internal procedures and variables */
20 bool try_marker();
21
22 /* ERROR KLUDGE Stuff */
23 #define ESTEPS 200
24 #define ESTART (-9.9)
25 #define ESTEP 0.1
26 int errs[ESTEPS], noterrs[ESTEPS], lod[ESTEPS];
27 int cum_not[ESTEPS], cum_err[ESTEPS];
28 real ratio[ESTEPS];
29
30
npt_cmds_init()31 void npt_cmds_init()
32 {
33 int i;
34 for (i=0; i<ESTEPS; i++) errs[i]=noterrs[i]=0;
35 }
36
37
make_map()38 command make_map()
39 {
40 MAP *map;
41 int num_loci;
42
43 map= NULL;
44 mapm_ready(ANY_DATA,2,MAYBE_PERM,&num_loci);
45 nomore_args(0);
46
47 run {
48 map=allocate_map(num_loci);
49 print(MAP_DIVIDER);
50 for_all_orders(seq, map) {
51 init_rec_fracs(map);
52 converge_to_map(map);
53 print_long_map(map,"Map:");
54 print(MAP_DIVIDER);
55 }
56 } on_exit {
57 free_map(map);
58 relay_messages;
59 }
60 }
61
62
draw_map()63 command draw_map()
64 {
65 MAP *map;
66 int num_loci;
67 char name[PATH_LENGTH+1];
68 FILE *fp=NULL;
69
70 /* ONE_ORDER only - really don't want this to try to create a
71 60 or more page PostScript file if someone inadvertantly executes
72 it with a compare type sequence of many permutations */
73
74 map= NULL;
75 mapm_ready(ANY_DATA,2,ONE_ORDER,&num_loci);
76 use_uncrunched_args();
77 get_arg(stoken,"map",name);
78 nomore_args(num_args);
79
80 run {
81 if (!make_filename(name,FORCE_EXTENSION,WRS(PS_EXT)))
82 { sf(ps,"illegal filename '%s'",name); error(ps); }
83 fp= open_file(name,WRITE);
84 sf(ps,"Drawing map in PostScript file '%s'... \n",name); pr();
85 map=allocate_map(num_loci);
86 get_one_order(seq,map);
87 init_rec_fracs(map);
88 converge_to_map(map);
89 print_ps_map(fp,map);
90 close_file(fp);
91 print("ok\n");
92
93 } on_exit {
94 if (msg==CANTOPEN) {
95 sf(ps,"Can't create output file '%s'",name); error(ps);
96 } else if (msg==CANTCLOSE) {
97 error("\nCan't close file - disk is full?");
98 } else {
99 free_map(map);
100 relay_messages;
101 }
102 }
103 }
104
105
genotypes()106 command genotypes()
107 {
108 MAP *map;
109 int num_loci;
110
111 map= NULL;
112 mapm_ready(F2_DATA,2,MAYBE_PERM,&num_loci);
113 nomore_args(0);
114
115 run {
116 map=allocate_map(num_loci);
117 print(MAP_DIVIDER); print("Genotypes:\n");
118 for_all_orders(seq, map) {
119 init_rec_fracs(map);
120 converge_to_map(map);
121 print_f2_map_genotypes(map,use_haplotypes,TRUE,0);
122 print(MAP_DIVIDER);
123 }
124 } on_exit {
125 free_map(map);
126 relay_messages;
127 }
128 }
129
130
131 #define COMPARE_NONE \
132 "no orders allowed by three-point analysis at threshold %.2lf\n"
133 #define BEST_THRESH "\nBest order%s at threshold %.2lf"
134 #define BEST_ORDERS "\nBest %d orders"
135 #define BEST_ORDER "\nBest order"
136 #define N_EXCLUDED " (%d excluded by three-point analysis)"
137
compare()138 command compare()
139 {
140 SAVED_LIST *list=NULL;
141 MAP *map;
142 int maps_to_save, num_to_print, loci_per_map, n, total, excluded, tried, i;
143 int *locus, num_loci, prev;
144 real input_num, threshold, best;
145
146 mapm_ready(ANY_DATA,2,PERM_SEQ,&num_loci);
147 get_arg(itoken,20,&maps_to_save);
148 get_arg(rtoken,0.0,&threshold);
149 if (maps_to_save<1 || maps_to_save>999) usage_error(1);
150 if (threshold<0.0 || threshold>100.0) usage_error(2);
151 if (threshold==0.0) threshold=VERY_UNLIKELY; else threshold=-threshold;
152 nomore_args(num_args);
153
154 run {
155 list=allocate_map_list(maps_to_save,num_loci,SORTED,&map);
156 array(locus,num_loci,int); /* NO CRUNCH?, #loci shouldn't change */
157 get_list_of_all_loci(seq,locus,&loci_per_map,num_loci);
158 num_orders=0; /* global - deletes all pre-existing orders */
159 for (i=0; i<raw.num_markers; i++) order_next[i]=NO_LOCUS;
160 if (use_three_pt)
161 setup_3pt_data(locus,loci_per_map,-three_pt_threshold);
162
163 total=excluded=tried=0; n=0;
164 for_all_orders(seq,map) total++;
165 for_all_orders(seq,map) {
166 keep_user_amused("map",++n,total);
167 if (use_three_pt &&
168 !three_pt_verify(map->locus,map->num_loci,three_pt_window))
169 { excluded++; continue; }
170 init_rec_fracs(map);
171 converge_to_map(map);
172 insert_map_into_list(list,&map);
173 tried++;
174 }
175 if (tried==0)
176 { sf(ps,COMPARE_NONE,three_pt_threshold); pr(); abort_command(); }
177
178 map=get_best_map(list); best=map->log_like; i=1;
179 if (threshold!=VERY_UNLIKELY) {/* can you say "abstraction violation" */
180 while (i<list->num_maps &&
181 list->map_list[i]->log_like>(best+threshold)) i++;
182 num_to_print=i;
183 sf(ps,BEST_THRESH,maybe_s(num_to_print),-threshold); pr();
184 } else {
185 num_to_print=min(maps_to_save,tried);
186 if (num_to_print>1) sf(ps,BEST_ORDERS,num_to_print);
187 else strcpy(ps,BEST_ORDER);
188 pr();
189 }
190 if (excluded>0) { sf(ps,N_EXCLUDED,excluded); pr(); }
191 print(":\n");
192 print_list(list,num_to_print);
193
194 print("order1 is set");
195 for (i=0; i<map->num_loci; i++) {
196 /* sf(ps,"%s ",rag(loc2str(map->locus[i]))); pr(); */
197 if (i==0) order_first[0]=map->locus[i];
198 else order_next[prev]= map->locus[i]; /* lint warning is OK */
199 prev=map->locus[i];
200 }
201 nl();
202 unorder_first[0]=NO_LOCUS; order_next[prev]=NO_LOCUS; num_orders=1;
203
204 } on_exit {
205 free_map_list(list);
206 relay_messages;
207 }
208 }
209
210
ripple()211 command ripple()
212 {
213 MAP *map0=NULL, *map;
214 SAVED_LIST *list;
215 int n_loci, window, excluded, tried, num_ok, i, j, k, n;
216 real thresh, best;
217 bool same;
218
219 mapm_ready(ANY_DATA,3,ONE_ORDER,&n_loci);
220 get_arg(itoken,5,&window); if (!irange(&window,3,99)) usage_error(1);
221 get_arg(rtoken,2.0,&thresh);if (!rrange(&thresh,0.0,100.0)) usage_error(1);
222 if (window>n_loci)
223 error("window size is greater than number of loci in seq");
224 if (has_fixed_dists(seq))
225 error("can not use sequence with fixed distances");
226 nomore_args(num_args);
227 for (i=window, n=1; i>1; i--) n*=i; /* window factorial */
228
229 run {
230 map0=allocate_map(n_loci);
231 list=allocate_map_list(n,n_loci,SORTED,&map);
232 get_one_order(seq,map0);
233 init_rec_fracs(map0);
234 print(MAP_DIVIDER);
235 converge_to_map(map0);
236 print_long_map(map0,"Map To Test:");
237 if (use_three_pt)
238 setup_3pt_data(map0->locus,map0->num_loci,-three_pt_threshold);
239
240 print(MAP_DIVIDER);
241 sf(ps,"Window-size: %d Log-likelihood Threshold: %.2lf\n",
242 window,thresh); pr();
243 print("Comparing maps with ALL flanking markers...\n\n");
244 for (i=0; i<=n_loci-window; i++) {
245 clean_list(list); map=get_map_to_bash(list);
246 make_compare_seq(map0->locus,map0->num_loci,i,window);
247
248 print("compare ");
249 if (i>0) print("...{"); else print(" {");
250 for (j=i; j<i+window; j++) {
251 print(rag(loc2str(map0->locus[j])));
252 if (j!=i+window-1) print(" ");
253 }
254 if (i+window-1<map0->num_loci-1) print("}... ");
255 else print("} ");
256 /* to_column(17+(print_names ? 10:5)*window); */
257 flush();
258
259 excluded=tried=0;
260 for_all_orders(seq,map) {
261 if (use_three_pt && /* only threept the compared part */
262 !three_pt_verify(map->locus+i,window,three_pt_window))
263 { excluded++; continue; }
264 init_rec_fracs(map);
265 converge_to_map(map);
266 insert_map_into_list(list,&map); tried++;
267 }
268 if (tried==0) { print("(all excluded)\n"); continue; }
269
270 same=TRUE; map=get_best_map(list);
271 for (j=0; j<map0->num_loci; j++)
272 if (map0->locus[j]!=map->locus[j]) { same=FALSE; break; }
273 best=map->log_like; k=1;
274 while (k<list->num_maps && /* thresh>0 here */
275 list->map_list[k]->log_like>(best-thresh)) k++;
276
277 if (!same || k>1) {
278 sf(ps,"\nbest order%s:",maybe_s(k)); pr();
279 if (excluded>0) { sf(ps," (%d excluded)",excluded); pr(); }
280 nl(); print_list(list,k); nl();
281 } else print("ok\n");
282 }
283 print(MAP_DIVIDER);
284
285 } on_exit {
286 free_map_list(list);
287 free_map(map0);
288 relay_messages;
289 }
290 }
291
292
293 #define TRY_NONE "%s - all orders excluded by three-point analysis\n"
294 #define TRY_TOO_MANY "too many loci to try"
295 #define MAX_PAIRED 20
296
try()297 command try()
298 {
299 SAVED_LIST **list=NULL;
300 MAP *map=NULL;
301 int *marker_to_try=NULL, **new_marker=NULL, num_to_try_at_once, num_tries;
302 int num_seq_loci, num_intervals, first_marker, i, j, n, m, next;
303 int **paired_loci=NULL, next_paired, num_paired, num_total, max_paired;
304 bool **exclude_interval=NULL, **zero_placement=NULL, *is_locus_paired=NULL;
305 char *err, token[TOKLEN+1], *intervals, *p;
306
307 void expand_seq_names(); /* KLUDGE from sequence.c, gotta do better */
308 char *markers, str[MAX_SEQ_LEN+99]; /* KLUDGE KLUDGE KLUDGE */
309
310 mapm_ready(ANY_DATA,2,MAYBE_PERM,&num_seq_loci);
311 strcpy(str,args); markers=str;
312 if (nullstr(args)) usage_error(0);
313 /* NEED SOMETHING LIKE PARSE_LOCUS_ARGS AND CRUNCH */
314
315 run {
316 array(marker_to_try,raw.num_markers,int); /* w/paired, num_total */
317 matrix(new_marker,raw.num_markers,MAX_PAIRED,int);
318
319 num_tries=0; num_total=0; max_paired=1;
320 for (i=0; i<raw.num_markers; i++) for (j=0; j<MAX_PAIRED; j++)
321 new_marker[i][j]=NO_LOCUS;
322
323 /* preprocess out the paired loci */
324 expand_seq_names(markers);
325 while (stoken(&markers,sREQUIRED,token)) {
326 if (streq(token,"[")) {
327 if (!stoken(&markers,sREQUIRED,token))
328 error("expected a locus after '['");
329 if (!is_a_locus(token,&n,&err) || !nullstr(err))
330 { sf(ps,"'%s' is not a locus",token); error(ps); }
331 if (num_total==raw.num_markers) error(TRY_TOO_MANY);
332 next=0;
333 new_marker[num_tries][next++]=n;
334 marker_to_try[num_total++]=n;
335 while (stoken(&markers,sREQUIRED,token)) {
336 if (streq(token,"]")) break;
337 if (!is_a_locus(token,&m,&err) || !nullstr(err))
338 { sf(ps,"'%s' is not a locus",token); error(ps); }
339 if (num_total==raw.num_markers) error(TRY_TOO_MANY);
340 marker_to_try[num_total++]=m;
341 if (next==MAX_PAIRED) error(TRY_TOO_MANY);
342 new_marker[num_tries][next++]=m;
343 }
344 if (!streq(token,"]")) error("missing ']'");
345 max_paired=max(next,max_paired);
346 num_tries++;
347
348 } else if (streq(token,"<")) {
349 if (!stoken(&markers,sREQUIRED,token))
350 error("expected a locus after '<'");
351 if (!is_a_locus(token,&n,&err) || !nullstr(err))
352 { sf(ps,"'%s' is not a locus",token); error(ps); }
353 if (num_total==raw.num_markers) error(TRY_TOO_MANY);
354 next=0;
355 new_marker[num_tries][next++]=n;
356 marker_to_try[num_total++]=n;
357 while (stoken(&markers,sREQUIRED,token)) {
358 if (streq(token,">")) break;
359 if (!is_a_locus(token,&m,&err) || !nullstr(err))
360 { sf(ps,"'%s' is not a locus",token); error(ps); }
361 if (num_total==raw.num_markers) error(TRY_TOO_MANY);
362 marker_to_try[num_total++]=m;
363 if (next==MAX_PAIRED) error(TRY_TOO_MANY);
364 new_marker[num_tries][next++]=m;
365 }
366 if (!streq(token,">")) error("missing '>'");
367 if (next>1) { /* then it's paired */
368 for (i=0; i<next; i++)
369 new_marker[num_tries+1][i]=
370 new_marker[num_tries][next-i-1];
371 max_paired=max(next,max_paired);
372 num_tries+=2;
373 } else num_tries++;
374
375 } else {
376 if (!is_a_locus(token,&n,&err) || !nullstr(err))
377 { sf(ps,"'%s' is not a locus",token); error(ps); }
378 if (num_total==raw.num_markers) error(TRY_TOO_MANY);
379 new_marker[num_tries][0]=n;
380 marker_to_try[num_total++]=n;
381 num_tries++;
382 }
383 }
384
385 /* test some limits here?? */
386 num_intervals= num_seq_loci+1;
387 num_to_try_at_once= min((print_names ? 6:8),num_tries);
388 matrix(exclude_interval,num_to_try_at_once,num_intervals,bool);
389 matrix(zero_placement,num_to_try_at_once,num_intervals,bool);
390 map= allocate_map(num_seq_loci);
391 array(list,num_to_try_at_once,SAVED_LIST*);
392 for (i=0; i<num_to_try_at_once; i++)
393 list[i]= allocate_map_list(num_intervals+1,num_seq_loci+max_paired,
394 UNSORTED,NULL);
395
396 /* list all loci in seq and args in map->locus, then setup 3pt */
397 get_one_order(seq,map);
398 init_rec_fracs(map); /* no ctm */
399 if (use_three_pt) {
400 for (j=0; j<map->num_loci; j++) {
401 if (num_total==raw.num_markers) error(TRY_TOO_MANY);
402 marker_to_try[num_total++]=map->locus[j];
403 }
404 setup_3pt_data(marker_to_try,num_total,-three_pt_threshold);
405 }
406
407 /* Here we loop over 8 markers to try at a time, calculating the
408 8 map_lists and printing the results. Each map_list has the
409 results of sticking one locus into all selected intervals of
410 one order. Note that i counts 0..7 and j counts from n...n+7,
411 where n is the 1st-marker of this set of 8 to try. */
412
413 n=0;
414 for_all_orders(seq,map) {
415 for (first_marker=0; first_marker<num_tries;
416 first_marker+=num_to_try_at_once) {
417 /* For each 8 markers to try... */
418 for (i=0, j=first_marker; j<num_tries && i<num_to_try_at_once;
419 j++, i++) {
420 if (!try_marker(new_marker[j],map,list[i],
421 exclude_interval[i],zero_placement[i],&n))
422 { sf(ps,TRY_NONE,rag(loc2str(marker_to_try[j]))); pr(); }
423 }
424 nl();
425 print_trys(list,map,exclude_interval,new_marker,i,
426 first_marker);
427 nl(); n=0;
428 for (i=0; i<num_to_try_at_once; i++) clean_list(list[i]);
429 }
430 }
431
432 } on_exit {
433 unarray(marker_to_try,int);
434 unarray(new_marker,int);
435 unmatrix(exclude_interval,num_to_try_at_once,bool);
436 unmatrix(zero_placement,num_to_try_at_once,bool);
437 free_map(map);
438 if (list!=NULL) {
439 for (i=0; i<num_to_try_at_once; i++) free_map_list(list[i]);
440 unarray(list,SAVED_LIST*);
441 }
442 relay_messages;
443 }
444 }
445
446
447 /* really should replace this all with npt_exclusions, or some such thing */
448 #define is_zero(rec_frac,sex) \
449 (rec_frac[MALE]<ZERO_DIST && (!sex || rec_frac[FEMALE]<ZERO_DIST))
450
try_marker(marker,original_map,list,excluded,zero,count)451 bool try_marker(marker,original_map,list,excluded,zero,count)
452 int *marker;
453 MAP *original_map; /* contains the list of loci in the order */
454 SAVED_LIST *list;
455 bool *excluded, *zero;
456 int *count;
457 {
458 int i, j, num_loci, next, num_ok, last, sex, loc, num_paired, best_i;
459 MAP *map;
460 real best;
461
462 clean_list(list);
463 map=get_map_to_bash(list);
464 original_map->unlink= NONE_UNLINKED;
465
466 for (i=0; i<original_map->num_loci; i++) excluded[i]=FALSE;
467 num_ok= original_map->num_loci+1;
468 for (num_paired=0; marker[num_paired]!=NO_LOCUS; num_paired++)
469 if (use_three_pt)
470 num_ok=three_pt_exclusions(original_map->locus,original_map->num_loci,
471 marker[num_paired],excluded);
472 if (num_paired==0) send(CRASH);
473
474 last=original_map->num_loci; best=VERY_UNLIKELY;
475 for (i=0; i<=last; i++) if (!excluded[i]) {
476 mapcpy(map,original_map,TRUE);
477 for (j=0; j<num_paired; j++)
478 if (!insert_locus(map,i+j,marker[j])) send(CRASH);
479 keep_user_amused("map",++*count,0);
480 init_rec_fracs(map);
481 converge_to_map(map);
482 if (map->log_like>best) { best=map->log_like; best_i=i; }
483
484 /* find zero placements. indexing is:
485 orig-loci: 0 1 | 0 1 | 0 1
486 intervals: 0 1 2 | 0 1 2 | 0 1 2
487 new-seq: x 0 1 | 0 x 1 | 0 1 x
488 recfracs: 0 1 | 0 1 | 0 1
489 index: i=0 | i=1 | i=2 */
490 sex=map->sex_specific;
491 if (i>0 && is_zero(map->rec_frac[i-1],sex)) zero[i]=TRUE;
492 if (i<last && is_zero(map->rec_frac[i+num_paired-1],sex)) zero[i]=TRUE;
493 if (zero[i]) for (j=1; j<num_paired; j++)
494 if (i+j<last && !is_zero(map->rec_frac[i+j],sex)) zero[i]=FALSE;
495 insert_map_into_list(list,&map);
496 } else keep_user_amused("map",++*count,0);
497
498 if (num_ok>0 && zero[best_i]) {
499 for (i=best_i+1; !excluded[i] && i<=last &&
500 list->map_list[i]->log_like-best>ZERO_PLACE && zero[i]; i++)
501 list->map_list[i]->log_like=ZERO_LIKE;
502 for (i=best_i-1; !excluded[i] && i>=0 &&
503 list->map_list[i]->log_like-best>ZERO_PLACE && zero[i]; i--)
504 list->map_list[i]->log_like=ZERO_LIKE;
505 }
506 /* INF dist */
507 mapcpy(map,original_map,TRUE); /* cleans map */
508 i=map->num_loci-1; /* interval to unlink */
509 for (j=0; j<num_paired; j++)
510 if (!insert_locus(map,map->num_loci+j,marker[j])) send(CRASH);
511 keep_user_amused("map",++*count,0);
512 map->rec_frac[i][MALE]= UNLINK_ME;
513 map->rec_frac[i][FEMALE]=UNLINK_ME;
514 init_rec_fracs(map);
515 converge_to_map(map);
516 insert_map_into_list(list,&map);
517
518 return(num_ok==0 ? FALSE:TRUE);
519 }
520
521
522
523 #define ORDER_NO_GROUPS \
524 "\nNo linkage groups found at specified threshold.\n"
525 #define ORDER_AT "Placing at log-likelihood threshold %.2lf...\n"
526 #define START_CRITERIA \
527 "Starting Orders: Size %d, Log-Likelihood %.2lf, Searching up to %d subsets\n"
528 #define INF_CRITERIA \
529 "Informativeness: min #Individuals %d%s, min Distance %s\n"
530 #define LINK_CRITERIA \
531 "Linkage Groups at min LOD %.2lf, max Distance %s\n"
532
order_maker()533 command order_maker()
534 {
535 int *loci=NULL, num_loci, *linkage_group=NULL, num_unlinked, group_size;
536 int starter[3], *subset=NULL, **seed_temp=NULL, subset_size, groups_done;
537 int i, j, k, num, prev, num_to_place, num_unplaced, seed_size, seed_tries;
538 bool seed_ok, found;
539 MAP *order=NULL, *seed_map=NULL;
540 PLACEME **unplaced;
541 real lodbound, thetabound, seed_like;
542
543 /* args: lod theta start-window start-like max-to-try */
544 mapm_ready(F2_DATA,3,LIST_SEQ,&num_loci); /* F2 because #indivs */
545 if (!rtoken(&args,default_lod,&lodbound) || !rrange(&lodbound,0.0,1000.0))
546 usage_error(1);
547 if (!rtoken(&args,default_theta,&thetabound) || !input_dist(&thetabound))
548 usage_error(2);
549 get_arg(itoken,5,&seed_size); if (seed_size<3) usage_error(0);
550 get_arg(rtoken,3.0,&seed_like); if (seed_like<=0.0) usage_error(0);
551 get_arg(itoken,50,&seed_tries); if (seed_tries<1) usage_error(0);
552 nomore_args(0);
553 seed_like= -seed_like;
554
555 sf(ps,LINK_CRITERIA,lodbound,rag(rf2str(thetabound))); pr();
556 sf(ps,START_CRITERIA,seed_size,-seed_like,seed_tries); pr();
557
558 sf(ps,INF_CRITERIA,npt_min_indivs,
559 (npt_codominant ? " (codominant only)":""),
560 rag(rf2str(npt_min_theta))); pr();
561 if (npt_threshold==npt_first_threshold) /* globals */
562 sf(ps,"Placement Threshold %.2lf, Npt-Window %d\n",npt_threshold,
563 npt_window);
564 else
565 sf(ps,"Placement Threshold-1 %.2lf, Threshold-2 %.2lf, Npt-Window %d\n",
566 npt_first_threshold,npt_threshold,npt_window);
567 pr();
568
569 run {
570 alloc_list_of_all_loci(seq,&loci,&num_loci);
571 array(linkage_group,num_loci,int);
572 order=allocate_map(MAX_MAP_LOCI);
573 seed_map=allocate_map(seed_size);
574 array(subset,num_loci,int);
575 matrix(seed_temp,seed_tries+1,num_loci,int);
576 parray(unplaced,num_loci,PLACEME);
577 for (i=0; i<num_loci; i++) {
578 array(unplaced[i]->excluded,MAX_MAP_LOCI+1,bool);
579 unplaced[i]->best_map=allocate_map(MAX_MAP_LOCI);
580 }
581
582 num_unlinked=num_loci;
583 num_orders=0; /* global - deletes all pre-existing orders */
584 for (i=0; i<raw.num_markers; i++) order_next[i]=NO_LOCUS;
585
586 groups_done=0;
587 do {
588 get_linkage_group(loci,&num_unlinked,linkage_group,&group_size,
589 lodbound,thetabound);
590 if (group_size>=2) {
591 print(MAP_DIVIDER);
592 inv_isort(linkage_group,group_size);
593 groups_done++;
594 sf(ps,"Linkage group %d, %d Markers:\n",groups_done,
595 group_size); pr();
596 for (i=0; i<group_size; i++) {
597 num=linkage_group[i];
598 sf(ps,"%4d %s%s",num+1,raw.locus_name[num],
599 (use_haplotypes && haplotyped(num) ? "+":""));
600 pad_to_len(ps,14); pr();
601 if (i==group_size-1 || i%5==4) nl(); else print(" ");
602 }
603 nl();
604
605 if (group_size<3) {
606 print("Group is too small to map.\n");
607 sf(ps,"order%d= ",groups_done); pr();
608 for (i=0; i<group_size; i++) {
609 sf(ps,"%s ",rag(loc2str(linkage_group[i]))); pr();
610 if (i==0) order_first[groups_done-1]=linkage_group[i];
611 else order_next[prev]=linkage_group[i];
612 prev=linkage_group[i]; /* lint warning is OK */
613 }
614 nl();
615 num_orders++;
616 continue;
617 }
618
619 if (use_three_pt)
620 setup_3pt_data(linkage_group,group_size,-three_pt_threshold);
621 seed_ok=FALSE;
622 informative_subset(linkage_group,group_size,
623 npt_min_indivs,npt_min_theta,npt_codominant,
624 use_haplotypes,subset,&subset_size);
625
626 if (subset_size<3 || subset_size<seed_size)
627 print("Most informative subset is too small... \n");
628 else if (subset_size==group_size)
629 print("All markers are informative... \n");
630 else {
631 print("Most informative subset: ");
632 for (i=0; i<subset_size; i++) {
633 sf(ps,"%d%s",(subset[i])+1,
634 (use_haplotypes && haplotyped(subset[i]) ? "+":""));
635 pr(); if (i<subset_size-1) print(" ");
636 }
637 nl();
638 if (find_seed_order(TRUE,subset,subset_size,seed_size,
639 seed_tries,seed_like,order,seed_map,
640 seed_temp))
641 seed_ok=TRUE;
642 else if (group_size==subset_size) continue; /* punt LG */
643 }
644
645 if (!seed_ok) {
646 if (find_seed_order(FALSE,linkage_group,group_size,
647 seed_size,seed_tries,seed_like,order,
648 seed_map,seed_temp))
649 seed_ok=TRUE;
650 else continue; /* punt this LG */
651 }
652
653 /* delete seed markers from linkage group */
654 num_unplaced=0;
655 for (j=0; j<group_size; j++) {
656 for (i=0, found=FALSE; i<order->num_loci; i++)
657 if (order->locus[i]==linkage_group[j])
658 { found=TRUE; break; }
659 if (!found)
660 unplaced[num_unplaced++]->locus=linkage_group[j];
661 }
662
663 if (num_unplaced>0) {
664 /* extend_order is chatty, prints map and places at end */
665 nl(); sf(ps,ORDER_AT,npt_first_threshold); pr();
666 extend_order(order,unplaced,&num_unplaced,
667 -npt_first_threshold,TRUE);
668
669 if (npt_first_threshold!=npt_threshold && num_unplaced>0) {
670 print(SUB_DIVIDER);
671 sf(ps,ORDER_AT,npt_threshold); pr();
672 extend_order(order,unplaced,&num_unplaced,
673 -npt_threshold,FALSE);
674 }
675 }
676
677 nl();
678 sf(ps,"order%d= ",groups_done); pr();
679 for (i=0; i<order->num_loci; i++) {
680 sf(ps,"%s ",rag(loc2str(order->locus[i]))); pr();
681 if (i==0) order_first[groups_done-1]=order->locus[i];
682 else order_next[prev]=order->locus[i];
683 prev=order->locus[i]; /* lint warning is OK */
684 }
685 nl();
686 sf(ps,"other%d= ",groups_done); pr();
687 for (i=0; i<num_unplaced; i++) {
688 sf(ps,"%s ",rag(loc2str(unplaced[i]->locus))); pr();
689 if (i==0) unorder_first[groups_done-1]=unplaced[i]->locus;
690 else order_next[prev]=unplaced[i]->locus;
691 prev=unplaced[i]->locus; /* lint warning is OK */
692 }
693 nl();
694 num_orders++;
695
696 if (print_all_maps) for (i=0; i<num_unplaced; i++)
697 if (unplaced[i]->best_map->num_loci>0) {
698 print(SUB_DIVIDER);
699 sf(ps,"Best placement of %s:",
700 rag(loc2str(unplaced[i]->locus)));
701 print_special_map(unplaced[i]->best_map,ps,
702 order->num_loci,order->locus);
703 }
704
705 } /* if group size */
706 } while (num_unlinked>0);
707
708 if (groups_done==0) print(ORDER_NO_GROUPS);
709 else print(MAP_DIVIDER);
710
711 } on_exit {
712 unarray(loci,int);
713 unarray(linkage_group,int);
714 free_map(order);
715 free_map(seed_map);
716 unarray(subset,int);
717 unmatrix(seed_temp,seed_tries,int);
718 for (i=0; i<num_loci; i++) {
719 unarray(unplaced[i]->excluded,bool);
720 free_map(unplaced[i]->best_map);
721 }
722 unparray(unplaced,num_loci,PLACEME);
723 relay_messages;
724 }
725 }
726
727
greedy()728 command greedy()
729 {
730 int *locus=NULL, num_loci, num_order, *all_loci, total;
731 int i, prev, num, num_unplaced;
732 MAP *order=NULL;
733 PLACEME **unplaced;
734
735 /* args: lod theta start-window start-like max-to-try */
736 mapm_ready(F2_DATA,2,LIST_SEQ,&num_order); /* F2 because place_locus */
737 if (nullstr(args)) usage_error(0);
738 parse_locus_args(&locus,&num_loci); /* error if fails, >=1 */
739 crunch_locus_list(locus,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,IN_ARGS);
740
741 if (npt_threshold==npt_first_threshold) /* globals */
742 sf(ps,"Placement Threshold %.2lf, Npt-Window %d\n",
743 npt_threshold,npt_window);
744 else
745 sf(ps,"Placement Threshold-1 %.2lf, Threshold-2 %.2lf, Npt-Window %d\n",
746 npt_first_threshold,npt_threshold,npt_window);
747 pr();
748
749 if (num_order>=MAX_MAP_LOCI) error("starting order is too big");
750 run {
751 total= num_order+num_loci;
752 order=allocate_map(total); /* maybe MAX_MAP */
753 get_list_of_all_loci(seq,order->locus,&order->num_loci,total);
754 array(all_loci,total,int);
755 parray(unplaced,num_loci,PLACEME);
756 for (i=0; i<num_loci; i++) {
757 array(unplaced[i]->excluded,total+1,bool);
758 unplaced[i]->best_map=allocate_map(total);
759 }
760
761 num_orders=0; /* global - deletes all pre-existing orders */
762 for (i=0; i<raw.num_markers; i++) order_next[i]=NO_LOCUS;
763 print(MAP_DIVIDER);
764
765 total=0;
766 for (i=0; i<order->num_loci; i++) all_loci[total++]=order->locus[i];
767 for (i=0; i<num_loci; i++) all_loci[total++]=locus[i];
768 sort_loci(all_loci,total);
769 if (use_three_pt)
770 setup_3pt_data(all_loci,total,-three_pt_threshold);
771
772 for (i=0; i<num_loci; i++) unplaced[i]->locus=locus[i];
773 num_unplaced=num_loci;
774
775 sf(ps,"%d Markers to order:\n",total); pr();
776 for (i=0; i<total; i++) {
777 num=all_loci[i];
778 sf(ps,"%4d %s%s",num+1,raw.locus_name[num],
779 (use_haplotypes && haplotyped(num) ? "+":""));
780 pad_to_len(ps,14); pr();
781 if (i==total-1 || i%5==4) nl(); else print(" ");
782 }
783
784 nl(); sf(ps,ORDER_AT,npt_first_threshold); pr();
785 extend_order(order,unplaced,&num_unplaced,-npt_first_threshold,TRUE);
786
787 if (npt_first_threshold!=npt_threshold && num_unplaced>0) {
788 print(SUB_DIVIDER);
789 sf(ps,ORDER_AT,npt_threshold); pr();
790 extend_order(order,unplaced,&num_unplaced,-npt_threshold,FALSE);
791 }
792 nl();
793 print("order1= ");
794 for (i=0; i<order->num_loci; i++) {
795 sf(ps,"%s ",rag(loc2str(order->locus[i]))); pr();
796 if (i==0) order_first[0]=order->locus[i];
797 else order_next[prev]=order->locus[i];
798 prev=order->locus[i]; /* lint warning is OK */
799 }
800 nl();
801 print("other1= ");
802 for (i=0; i<num_unplaced; i++) {
803 sf(ps,"%s ",rag(loc2str(unplaced[i]->locus))); pr();
804 if (i==0) unorder_first[0]=unplaced[i]->locus;
805 else order_next[prev]=unplaced[i]->locus;
806 prev=unplaced[i]->locus;
807 }
808 nl();
809 num_orders=1;
810
811 if (print_all_maps) for (i=0; i<num_unplaced; i++)
812 if (unplaced[i]->best_map->num_loci>0) {
813 print(SUB_DIVIDER);
814 sf(ps,"Best placement of %s:",rag(loc2str(unplaced[i]->locus)));
815 print_special_map(unplaced[i]->best_map,ps,
816 order->num_loci,order->locus);
817 }
818 print(MAP_DIVIDER);
819
820 } on_exit {
821 unarray(locus,int);
822 unarray(all_loci,int);
823 free_map(order);
824 for (i=0; i<num_loci; i++) {
825 unarray(unplaced[i]->excluded,bool);
826 free_map(unplaced[i]->best_map);
827 }
828 unparray(unplaced,num_loci,PLACEME);
829 relay_messages;
830 }
831 }
832