1 /******************************************************************************
2
3 # # # ###### #### ####
4 # ## # # # # # #
5 # # # # ##### # # #
6 # # # # # # # ### #
7 # # ## # # # ### # #
8 # # # # #### ### ####
9
10 ******************************************************************************/
11 /* This file is part of MAPMAKER 3.0b, Copyright 1987-1992, Whitehead Institute
12 for Biomedical Research. All rights reserved. See READ.ME for license. */
13
14 #define INC_LIB
15 #define INC_SHELL
16 #include "mapm.h"
17
18 /* global vars which happen to be declared here */
19 char **note;
20 real *error_rate;
21 int *modified;
22 int *my_group, num_groups;
23 int *haplo_next, *haplo_first; /* [#loci] */
24 int *order_first, *unorder_first, *order_next, num_orders; /* [#loci] */
25 int *class;
26 char **class_name;
27
28 bool two_pt_touched;
29 bool three_pt_touched;
30
31 /* internal for 2pt */
32 TWO_PT_DATA ***two_pt_data=NULL;
33 int two_pt_allocated=0, two_pt_used=0, two_pt_max=0, two_pt_collect=0;
34 TWO_PT_DATA **two_pt_list=NULL;
35 TWO_PT_DATA *UNLINKED_TWO_PT, the_unlinked_two_pt;
36 TWO_PT_DATA *get_next_two_pt_entry();
37
38 /* internal for 3pt */
39 typedef struct three_entry {
40 int locus2, locus3;
41 float delta1, delta2, delta3;
42 struct three_entry *next, *next_same2;
43 } TRIPLE_LIST;
44
45 typedef struct {
46 struct three_entry *entry, *unused;
47 } THREE_STRUCT;
48
49 THREE_STRUCT *three_pt_data;
50 TRIPLE_LIST *allocate_three_entries();
51 void deallocate_triple_list();
52 int num_threes_allocated, num_threes_deallocated; /* for testing purposes */
53
54 void put_next();
55 bool replace_triple();
56 bool restore_triple();
57 void save_triple();
58 bool same_2_loop();
59
60 #define UNFILLED 9999.0
61
62
63 /**************** Stuff to handle global 2pt data ****************/
64
allocate_two_pt(num_loci)65 void allocate_two_pt(num_loci)
66 int num_loci;
67 {
68 int i,j;
69
70 two_pt_data= NULL;
71 two_pt_list= NULL;
72 two_pt_max=0;
73
74 array(two_pt_data,num_loci,TWO_PT_DATA**);
75 for (i=0; i<num_loci; i++) {
76 array(two_pt_data[i],(i+1),TWO_PT_DATA*);
77 for (j=0; j<=i; j++) two_pt_data[i][j]=NULL;
78 two_pt_max+=i+1;
79 }
80 array(two_pt_list,two_pt_max,TWO_PT_DATA*);
81
82 UNLINKED_TWO_PT= &the_unlinked_two_pt;
83 UNLINKED_TWO_PT->lodscore[NOSEX]= UNLINKED_LOD;
84 UNLINKED_TWO_PT->lodscore[SEXSPEC]= UNLINKED_LOD;
85 UNLINKED_TWO_PT->theta[NOSEX]= UNLINKED_THETA;
86 UNLINKED_TWO_PT->theta[MALE]= UNLINKED_THETA;
87 UNLINKED_TWO_PT->theta[FEMALE]= UNLINKED_THETA;
88
89 two_pt_used= two_pt_allocated= two_pt_collect= 0;
90 two_pt_touched= FALSE;
91 }
92
93
free_two_pt(num_loci)94 void free_two_pt(num_loci)
95 int num_loci;
96 {
97 int i;
98 if (two_pt_data==NULL) return;
99
100 two_pt_touched=FALSE;
101 for (i=0; i<two_pt_allocated; i++)
102 { unsingle(two_pt_list[i],TWO_PT_DATA); }
103 unarray(two_pt_list,TWO_PT_DATA*);
104
105 for (i=0; i<num_loci; i++)
106 { unarray(two_pt_data[i],TWO_PT_DATA*); }
107 unarray(two_pt_data,TWO_PT_DATA**);
108 }
109
110
get_next_two_pt_entry(a,b,used_needs_incrementing)111 TWO_PT_DATA *get_next_two_pt_entry(a,b,used_needs_incrementing)
112 int a, b;
113 bool *used_needs_incrementing;
114 {
115 int i;
116 TWO_PT_DATA *p;
117
118 if (two_pt_data[a][b]!=NULL && two_pt_data[a][b]!=UNLINKED_TWO_PT) {
119 p=two_pt_data[a][b];
120 p->used=FALSE;
121 two_pt_data[a][b]=NULL;
122 two_pt_collect=0;
123 *used_needs_incrementing=FALSE;
124 return(p);
125 }
126
127 if (two_pt_used==two_pt_allocated) {
128 while (two_pt_collect<two_pt_used)
129 if (!two_pt_list[two_pt_collect]->used) {
130 *used_needs_incrementing=FALSE;
131 return(two_pt_list[two_pt_collect]);
132 } else two_pt_collect++;
133 /* none free if we fall to here */
134 expand_two_pt(EXPAND_DEFAULT);
135 }
136 if (two_pt_used>=two_pt_allocated || two_pt_list[two_pt_used]==NULL)
137 send(CRASH);
138
139 *used_needs_incrementing=TRUE;
140 return(two_pt_list[two_pt_used]); /* note NOT incremented yet! */
141 }
142
143
expand_two_pt(num_entries)144 void expand_two_pt(num_entries)
145 int num_entries;
146 {
147 int i;
148
149 if (num_entries==EXPAND_DEFAULT)
150 num_entries=INIT_2PT_SIZE(raw.num_markers);
151
152 for (i=0; i<num_entries; i++) {
153 if (two_pt_allocated>=two_pt_max) break;
154 single(two_pt_list[two_pt_allocated],TWO_PT_DATA);
155 two_pt_list[two_pt_allocated]->used=FALSE;
156 two_pt_allocated++;
157 }
158
159 }
160
161
162 /* The following procedures are the only external accessors to the
163 TWO_PT_DATA structure. */
164
compute_two_pt(a,b)165 void compute_two_pt(a,b) /* internal use only */
166 int a, b;
167 {
168 TWO_PT_DATA *two_pt;
169 int temp;
170 bool inc;
171
172 if (a<b) { temp=a; a=b; b=temp; }
173 two_pt= get_next_two_pt_entry(a,b,&inc);
174 quick_two_pt(a,b,two_pt,FALSE);
175 if (raw.data_type==CEPH) quick_two_pt(a,b,two_pt,TRUE); /* also w/sex */
176
177 if (two_pt->lodscore[NOSEX]<=UNLINKED_LOD &&
178 two_pt->theta[NOSEX]>=UNLINKED_THETA &&
179 (raw.data_type!=CEPH ||
180 (two_pt->lodscore[SEXSPEC]<=UNLINKED_LOD &&
181 two_pt->theta[MALE]>=UNLINKED_THETA &&
182 two_pt->lodscore[FEMALE]>=UNLINKED_THETA))) {
183 two_pt_data[a][b]=UNLINKED_TWO_PT;
184 } else {
185 two_pt->used=TRUE;
186 two_pt_data[a][b]=two_pt;
187 if (inc) two_pt_used++;
188 }
189 two_pt_touched=TRUE;
190 }
191
192
get_two_pt(a,b,lod,theta)193 bool get_two_pt(a,b,lod,theta) /* TRUE if not UNLINKED_TWO_PT */
194 int a, b;
195 real *lod, *theta;
196 {
197 int temp;
198
199 if (a<b) { temp=a; a=b; b=temp; }
200 if (two_pt_data[a][b]==NULL) compute_two_pt(a,b);
201
202 if (lod!=NULL) *lod= two_pt_data[a][b]->lodscore[NOSEX];
203 if (theta!=NULL) *theta=two_pt_data[a][b]->theta[NOSEX];
204 return(two_pt_data[a][b]!=UNLINKED_TWO_PT);
205 }
206
207
get_sex_two_pt(a,b,lod,thetam,thetaf)208 bool get_sex_two_pt(a,b,lod,thetam,thetaf) /* TRUE if not UNLINKED_TWO_PT */
209 int a, b;
210 real *lod, *thetam, *thetaf;
211 {
212 int temp;
213
214 if (a<b) { temp=a; a=b; b=temp; }
215 if (two_pt_data[a][b]==NULL) compute_two_pt(a,b);
216
217 if (lod!=NULL) *lod= two_pt_data[a][b]->lodscore[SEXSPEC];
218 if (thetam!=NULL) *thetam= two_pt_data[a][b]->theta[MALE];
219 if (thetaf!=NULL) *thetaf= two_pt_data[a][b]->theta[FEMALE];
220 return(two_pt_data[a][b]!=UNLINKED_TWO_PT);
221 }
222
223
224
225 /**************** Stuff for Global 3pt data ****************/
226
allocate_three_pt(num_total)227 void allocate_three_pt(num_total)
228 int num_total;
229 {
230 int i;
231
232 run {
233 if (three_pt_data!=NULL) free_three_pt(raw.num_markers);
234 num_threes_allocated=0;
235 three_pt_touched=FALSE;
236 single(three_pt_data,THREE_STRUCT);
237 three_pt_data->entry=NULL;
238 three_pt_data->unused=NULL;
239 array(three_pt_data->entry,num_total,TRIPLE_LIST);
240 for (i=0; i<num_total; i++) {
241 three_pt_data->entry[i].next= NULL;
242 three_pt_data->entry[i].next_same2= NULL;
243 three_pt_data->entry[i].locus2= NO_LOCUS;
244 three_pt_data->entry[i].locus3= NO_LOCUS;
245 three_pt_data->entry[i].delta1= UNFILLED;
246 three_pt_data->entry[i].delta2= UNFILLED;
247 three_pt_data->entry[i].delta3= UNFILLED;
248 }
249 three_pt_data->unused=
250 allocate_three_entries(INIT_3PT_SIZE(num_total));
251 } when_aborting { free_three_pt(num_total); relay; }
252 }
253
allocate_three_entries(num_to_alloc)254 TRIPLE_LIST *allocate_three_entries(num_to_alloc)
255 int num_to_alloc;
256 {
257 TRIPLE_LIST *part;
258
259 if (num_to_alloc == 0) return(NULL);
260 single(part, TRIPLE_LIST);
261 num_threes_allocated++;
262 part->next= NULL;
263 part->next_same2= NULL;
264 part->locus2= NO_LOCUS;
265 part->locus3= NO_LOCUS;
266 part->delta1= UNFILLED;
267 part->delta2= UNFILLED;
268 part->delta3= UNFILLED;
269 part->next= allocate_three_entries(num_to_alloc-1);
270 part->next_same2= part->next;
271 return(part);
272 }
273
274
bash_all_three_pt(num_total)275 void bash_all_three_pt(num_total)
276 int num_total;
277 {
278 free_three_pt(num_total);
279 allocate_three_pt(num_total);
280 }
281
282
283
free_three_pt(num_total)284 void free_three_pt(num_total)
285 int num_total;
286 {
287 int i;
288
289 num_threes_deallocated=0; /* for testing purposes */
290
291 for (i=0; i<num_total; i++) {
292 if (three_pt_data->entry[i].next != NULL)
293 deallocate_triple_list(three_pt_data->entry[i].next);
294 }
295 unarray(three_pt_data->entry, TRIPLE_LIST);
296 deallocate_triple_list(three_pt_data->unused);
297 unsingle(three_pt_data, THREE_STRUCT);
298 three_pt_touched=FALSE;
299 three_pt_data= NULL;
300
301 /* sf(ps,"3pt stats - allocated: %d, deallocated: %d\n",
302 num_threes_allocated, num_threes_deallocated); pr(); */
303 }
304
deallocate_triple_list(p)305 void deallocate_triple_list(p)
306 TRIPLE_LIST *p;
307 {
308 if (p->next != NULL) deallocate_triple_list(p->next);
309 unsingle(p, TRIPLE_LIST);
310 num_threes_deallocated++;
311 }
312
313
314 /* insert_triple() arranges loci and likelihoods in numeric order (loci)
315 and calls save_triple() to store the data in the global three_pt_data */
316
insert_triple(loc1,loc2,loc3,d1,d2,d3)317 void insert_triple(loc1,loc2,loc3,d1,d2,d3)
318 int loc1,loc2,loc3;
319 real d1,d2,d3;
320 {
321 /* d1 - 123, d2 - 132, d3= 213 */
322 int temploc;
323 real tempreal;
324
325 if (loc2 < loc1) {
326 temploc= loc2; tempreal= d2;
327 loc2= loc1; d2= d1;
328 loc1= temploc; d1= tempreal;
329 }
330 if (loc3 < loc2) {
331 temploc= loc3; tempreal= d3;
332 loc3= loc2; d3= d2;
333 loc2= temploc; d2= tempreal;
334 if (loc2 < loc1) {
335 temploc= loc2; tempreal= d2;
336 loc2= loc1; d2= d1;
337 loc1= temploc; d1= tempreal;
338 }
339 }
340 save_triple(loc1,loc2,loc3,d1,d2,d3);
341 }
342
343
save_triple(locus1,locus2,locus3,d1,d2,d3)344 void save_triple(locus1,locus2,locus3,d1,d2,d3)
345 int locus1,locus2,locus3;
346 real d1,d2,d3;
347 {
348 TRIPLE_LIST *t, *p;
349 real r1,r2,r3;
350
351 if (three_pt_data->entry[locus1].locus2 == NO_LOCUS) {
352 three_pt_data->entry[locus1].locus2= (short)locus2;
353 three_pt_data->entry[locus1].locus3= (short)locus3;
354 three_pt_data->entry[locus1].delta1= (float)d1;
355 three_pt_data->entry[locus1].delta2= (float)d2;
356 three_pt_data->entry[locus1].delta3= (float)d3;
357
358 } else if (get_triple(locus1,locus2,locus3,&r1,&r2,&r3)) {
359 /* over-write current entry */
360 replace_triple(locus1,locus2,locus3,d1,d2,d3);
361
362 } else { /* must get blank from unused pile */
363 if (three_pt_data->unused==NULL)
364 three_pt_data->unused=
365 allocate_three_entries(INIT_3PT_SIZE(raw.num_markers));
366
367 t= three_pt_data->unused;
368 three_pt_data->unused= three_pt_data->unused->next;
369 t->next= NULL; t->next_same2= NULL;
370 t->locus2= (short)locus2; t->locus3= (short)locus3;
371 t->delta1=(float)d1; t->delta2=(float)d2; t->delta3=(float)d3;
372
373 if (three_pt_data->entry[locus1].next == NULL) {
374 three_pt_data->entry[locus1].next= t; /* places t in linked list */
375 if (three_pt_data->entry[locus1].locus2 == locus2)
376 three_pt_data->entry[locus1].next_same2= t;
377
378 } else {
379 for (p=three_pt_data->entry[locus1].next; p->next!=NULL;
380 p=p->next) {}
381 p->next= t; /* places t on the end of the linked list ->next */
382
383 /* The following stuff creates the ->next_same2 linked list
384 which separately links all triple whose first two markers
385 are the same. This aids in the search algorithm in get_triple(),
386 speeding it up by about a factor of 20 */
387
388 if (three_pt_data->entry[locus1].locus2 == locus2) {
389 if (three_pt_data->entry[locus1].next_same2 == NULL)
390 three_pt_data->entry[locus1].next_same2= t;
391 else {
392 for (p=three_pt_data->entry[locus1].next_same2;
393 p->next_same2!=NULL; p=p->next_same2) {}
394 p->next_same2= t;
395 }
396
397 } else {
398 for (p=three_pt_data->entry[locus1].next; p->locus2!=locus2;
399 p=p->next) {}
400 if (p != t) {
401 for (p; p->next_same2 != NULL; p= p->next_same2) {}
402 p->next_same2= t;
403 }
404 }
405 }
406 }
407 }
408
409
replace_triple(locus1,locus2,locus3,d1,d2,d3)410 bool replace_triple(locus1,locus2,locus3,d1,d2,d3)
411 int locus1,locus2,locus3;
412 real d1,d2,d3;
413 {
414 TRIPLE_LIST *p,*new_entry;
415
416 if (three_pt_data->entry[locus1].locus2 == locus2 &&
417 three_pt_data->entry[locus1].locus3 == locus3) {
418 three_pt_data->entry[locus1].delta1= (float) d1;
419 three_pt_data->entry[locus1].delta2= (float) d2;
420 three_pt_data->entry[locus1].delta3= (float) d3;
421 return(TRUE);
422
423 } else {
424
425 for (p=three_pt_data->entry[locus1].next;
426 p->next != NULL &&
427 !(p->next->locus2==locus2 && p->next->locus3==locus3);
428 p=p->next){}
429
430 if (p->next != NULL) {
431 if (three_pt_data->unused == NULL)
432 allocate_three_entries(INIT_3PT_SIZE(raw.num_markers));
433
434 new_entry= three_pt_data->unused;
435 three_pt_data->unused= three_pt_data->unused->next;
436 new_entry->locus2= locus2;
437 new_entry->locus3= locus3;
438 new_entry->delta1= (float) d1;
439 new_entry->delta2= (float) d2;
440 new_entry->delta3= (float) d3;
441 new_entry->next= p->next->next;
442 new_entry->next_same2= p->next->next_same2;
443 p->next= new_entry;
444 /* fix the next_same2 list */
445 if (three_pt_data->entry[locus1].locus2 == locus2) {
446 if (three_pt_data->entry[locus1].next_same2->locus3 == locus3)
447 three_pt_data->entry[locus1].next_same2= new_entry;
448 else
449 p= three_pt_data->entry[locus1].next_same2;
450
451 } else {
452 for (p=three_pt_data->entry[locus1].next; p->locus2!=locus2;
453 p=p->next) {}
454 }
455
456 for (; p->next_same2 != NULL && p->next_same2->locus3 != locus3;
457 p=p->next_same2) {}
458
459 if (p->next_same2 != NULL) {
460 /* i.e., if it isn't the first in the next_same2 list */
461 p->next_same2= new_entry;
462 }
463
464 } else {
465 three_pt_data->entry[locus1].next->delta1= (float) d1;
466 three_pt_data->entry[locus1].next->delta2= (float) d2;
467 three_pt_data->entry[locus1].next->delta3= (float) d3;
468 }
469 }
470 return(TRUE);
471 }
472
473
compute_triple(a,b,c,d1,d2,d3)474 void compute_triple(a,b,c,d1,d2,d3)
475 int a, b, c; /* markers */
476 real *d1, *d2, *d3; /* likelihoods to be filled in */
477 {
478 /* d1 - abc, d2 - acb, d3 - bac */
479 MAP *map;
480 double best,like[3];
481
482 map= allocate_map(3);
483 map->locus[0]= a;
484 map->locus[1]= b;
485 map->locus[2]= c;
486 map->num_loci= 3;
487 init_rec_fracs(map);
488 converge_to_map(map);
489 like[0]= map->log_like;
490 best= like[0];
491
492 map->locus[0]= a;
493 map->locus[1]= c;
494 map->locus[2]= b;
495 map->num_loci= 3;
496 init_rec_fracs(map);
497 converge_to_map(map);
498 like[1]= map->log_like;
499 if (like[1] > best) best= like[1];
500
501 map->locus[0]= b;
502 map->locus[1]= a;
503 map->locus[2]= c;
504 map->num_loci= 3;
505 init_rec_fracs(map);
506 converge_to_map(map);
507 like[2]= map->log_like;
508 if (like[2] > best) best= like[2];
509
510 *d1= like[0] - best;
511 *d2= like[1] - best;
512 *d3= like[2] - best;
513
514 insert_triple(a,b,c,like[0]-best,like[1]-best,like[2]-best);
515 three_pt_touched= TRUE;
516 }
517
518
restore_triple(locus1,locus2,locus3,d1,d2,d3)519 bool restore_triple(locus1,locus2,locus3,d1,d2,d3)
520 int locus1,locus2,locus3;
521 real *d1,*d2,*d3;
522 {
523 /* d1= 123, d2= 132, d3= 213 -
524 these transformations are correct! */
525
526 if (locus2 < locus1) {
527 if (locus3 < locus2)
528 return(get_triple(locus3,locus2,locus1,d1,d3,d2));
529 else if (locus3 < locus1)
530 return(get_triple(locus2,locus3,locus1,d2,d3,d1));
531 else
532 return(get_triple(locus2,locus1,locus3,d3,d2,d1));
533
534 } else {
535 if (locus3 < locus1)
536 return(get_triple(locus3,locus1,locus2,d3,d1,d2));
537 else if (locus3 < locus2)
538 return(get_triple(locus1,locus3,locus2,d2,d1,d3));
539 else
540 return(get_triple(locus1,locus2,locus3,d1,d2,d3));
541 }
542 }
543
544
get_triple(locus1,locus2,locus3,d1,d2,d3)545 bool get_triple(locus1,locus2,locus3,d1,d2,d3)
546 int locus1,locus2,locus3;
547 real *d1,*d2,*d3;
548 {
549 TRIPLE_LIST *p;
550
551 if (three_pt_data->entry[locus1].locus2 == locus2) {
552 if (three_pt_data->entry[locus1].locus3 == locus3) {
553 *d1= (real) three_pt_data->entry[locus1].delta1;
554 *d2= (real) three_pt_data->entry[locus1].delta2;
555 *d3= (real) three_pt_data->entry[locus1].delta3;
556 return(TRUE);
557 } else {
558 if (!same_2_loop(three_pt_data->entry[locus1].next_same2,locus3,
559 d1,d2,d3))
560 return(FALSE);
561 else
562 return(TRUE);
563 }
564 } else {
565 p= three_pt_data->entry[locus1].next;
566 if (p == NULL) return(FALSE);
567 while(p->locus2 != locus2) {
568 if (p->next == NULL)
569 return(FALSE);
570 p= p->next;
571 }
572 if (!same_2_loop(p,locus3,d1,d2,d3))
573 return(FALSE);
574 else
575 return(TRUE);
576 }
577 }
578
579
same_2_loop(p,locus3,d1,d2,d3)580 bool same_2_loop(p,locus3,d1,d2,d3)
581 TRIPLE_LIST *p;
582 int locus3;
583 real *d1,*d2,*d3;
584 {
585 if (p == NULL) return(FALSE);
586 while(p->locus3 != locus3) {
587 if (p->next_same2 == NULL)
588 return(FALSE);
589 p= p->next_same2;
590 }
591 *d1= (real) p->delta1;
592 *d2= (real) p->delta2;
593 *d3= (real) p->delta3;
594 return(TRUE);
595 }
596
597
three_linked(locus,lodbound,thetabound,num_links,sex)598 bool three_linked(locus,lodbound,thetabound,num_links,sex)
599 int *locus; /* array of 3 locus numbers */
600 real lodbound, thetabound;
601 int num_links;
602 bool sex;
603 {
604 int x, i, j, count;
605 real lod, theta, thetam, thetaf;
606
607 count=0;
608 for (x=0; x<3; x++) {
609 i= locus[x];
610 j= locus[(x+1) % 3];
611 if (!sex) {
612 get_two_pt(i,j,&lod,&theta);
613 if ((lod>=lodbound) && (theta<=thetabound)) count++;
614 } else {
615 get_sex_two_pt(i,j,&lod,&thetam,&thetaf);
616 if (lod>=lodbound && (thetam<=thetabound || thetaf<=thetabound))
617 count++;
618 }
619 }
620 return (count>=num_links);
621 }
622
623
compute_3pt(seq,sex,trip_err_rate,like,map)624 void compute_3pt(seq,sex,trip_err_rate,like,map)
625 SEQ_NODE *seq;
626 bool sex;
627 real trip_err_rate, *like;
628 MAP *map;
629 {
630 int i, k;
631 real best;
632
633 k=0;
634 for_all_orders(seq,map) {
635 init_for_ctm(map,sex,trip_err_rate!=0.0,MAYBE);
636 if (trip_err_rate==LOCUS_ERROR_RATE)
637 for (i=0; i<3; i++) map->error_rate[i]= error_rate[map->locus[i]];
638 else if (trip_err_rate>0.0)
639 for (i=0; i<3; i++) map->error_rate[i]= trip_err_rate;
640 converge_to_map(map);
641 like[k++]=map->log_like;
642 }
643 for (i=0, best=VERY_UNLIKELY; i<3; i++) best=rmaxf(best,like[i]);
644 for (i=0; i<3; i++) like[i]-= best;
645
646 insert_triple(map->locus[0],map->locus[1],map->locus[2],
647 like[2],like[1],like[0]);
648 }
649
650
651
652 /***************************** Haplotype-Groups *****************************/
653
setup_haplo_group(locus,num_loci)654 void setup_haplo_group(locus,num_loci)
655 int *locus, num_loci;
656 {
657 int first, i;
658
659 first= locus[0];
660 for (i=0; i<num_loci; i++) { /* build as a linked list */
661 haplo_first[locus[i]]= first;
662 if (i==num_loci-1) haplo_next[locus[i]]= NO_LOCUS; /*end*/
663 else haplo_next[locus[i]]= locus[i+1];
664 }
665 }
666
667
delete_haplo_groups(locus,num_loci,old_locus,num_old)668 bool delete_haplo_groups(locus,num_loci,old_locus,num_old)
669 int *locus, num_loci, *old_locus, *num_old;
670 /* delete ALL haplo groups containing any of these loci */
671 {
672 int i, j, next, any;
673
674 any=FALSE; *num_old=0;
675 for (i=0; i<num_loci; i++)
676 if ((j=haplo_first[locus[i]])!=NO_LOCUS) {
677 any=TRUE;
678 do {
679 old_locus[(*num_old)++]=j;
680 next=haplo_next[j];
681 haplo_next[j]= NO_LOCUS;
682 haplo_first[j]=NO_LOCUS;
683 } while ((j=next)!=NO_LOCUS);
684 }
685 return (any);
686 }
687
688
689 /* msg is designed to look like those in chroms.c */
690 #define INSANE "%s- not the name of its haplotype-group, using %s\n"
691
force_haplo_sanity(locus,verbose)692 bool force_haplo_sanity(locus,verbose)
693 int *locus; /* just a ptr to one int, maybe side-effected */
694 bool verbose;
695 {
696 if (haplo_first[*locus]==NO_LOCUS || *locus==haplo_first[*locus])
697 return(TRUE);
698 if (verbose)
699 { sf(ps,INSANE,loc2str(*locus),rag(loc2str(haplo_first[*locus])));pr(); }
700 *locus=haplo_first[*locus];
701 return(FALSE);
702 }
703
704
705 /**************** Classes ****************/
706 /* defined here because they are easiest to save/load in the table */
707
isa_class(name,num)708 bool isa_class(name,num)
709 char *name;
710 int *num;
711 {
712 int i, classnum= -1;
713 if (streq(name,"")) return(FALSE);
714 for (i=0; i<NUM_CLASSES; i++)
715 if (class_name[i][0]!='\0' && xstreq(name,class_name[i]))
716 { classnum=i; break; }
717 if (classnum==-1) return(FALSE);
718 if (num!=NULL) *num=classnum;
719 return(TRUE);
720 }
721
722
make_new_class(name,why_not)723 bool make_new_class(name,why_not)
724 char *name;
725 char **why_not;
726 {
727 int i, classnum=-1;
728
729 if (!valid_name(name)) /* checks non-null */
730 { *why_not=ptr_to("illegal name"); return(FALSE); }
731 else if (!valid_new_name(name)) /* check class names too */
732 { *why_not=ptr_to("name is already in use"); return(FALSE); }
733
734 for (i=0; i<NUM_CLASSES; i++)
735 if (class_name[i][0]=='\0') { classnum=i; break; }
736 if (classnum==-1)
737 { *why_not=ptr_to("no more classes can be defined"); return(FALSE); }
738 strcpy(class_name[classnum],name);
739 return(TRUE);
740 }
741
742
print_class_names()743 void print_class_names() /* let print() auto_wrap... */
744 {
745 int i;
746 for (i=0; i<NUM_CLASSES; i++) { print(class_name[i]); print(" "); }
747 }
748
749
750
751 /**************** Other Stuff, and Alloc/Save/Load/Bash Funcs ****************/
752
753
754 #define BASH_ORDER1 \
755 "Resetting group and order information for entire data set...\n"
756 #define BASH_ORDER2 \
757 "Resetting age, class, and error-prob information for joined loci...\n"
758 #define BASH_ORDER3 \
759 "Resetting two-point and three-point information for joined loci...\n"
760
761 void return_to_unused(), remove_triple_list();
762
bash_order_info(changed,num_changed)763 void bash_order_info(changed,num_changed)
764 int *changed, num_changed;
765 {
766 int a, b, i, j, locus;
767 TRIPLE_LIST *p, *q, *prev;
768 /* this is merciless - might do *something* smarter */
769
770 print(BASH_ORDER1); /* bash all groups/orders */
771 num_groups=num_orders=0;
772 for (i=0; i<raw.num_markers; i++) {
773 my_group[i]=NO_GROUP;
774 order_next[i]=NO_LOCUS;
775 }
776 print(BASH_ORDER2); /* bash these age/class/error_probs */
777 for (i=0; i<num_changed; i++) {
778 modified[changed[i]]= FALSE;
779 class[changed[i]]= NO_CLASS;
780 error_rate[changed[i]]= DEFAULT_ERROR_RATE;
781 }
782
783 print(BASH_ORDER3); /* bash two-point data, garbage collecting */
784 for (i=0; i<num_changed; i++)
785 for (j=0; j<raw.num_markers; j++) {
786 if (changed[i]>j) { a=changed[i]; b=j; } else { a=j; b=changed[i]; }
787 if (two_pt_data[a][b]==NULL) continue;
788 two_pt_data[a][b]->used=FALSE;
789 two_pt_data[a][b]=NULL;
790 /* while (two_pt_used>0 && !(two_pt_list[two_pt_used-1]->used))
791 two_pt_used--; */
792 two_pt_collect=0;
793 }
794
795 /* bash three-point data */
796 for (i=0; i<num_changed; i++) {
797 three_pt_touched=TRUE;
798 a=changed[i];
799 /* first get rid of triple_list starting with marker a */
800 if (three_pt_data->entry[a].next != NULL)
801 remove_triple_list(three_pt_data->entry[a].next);
802 three_pt_data->entry[a].next= NULL;
803 three_pt_data->entry[a].next_same2= NULL;
804 three_pt_data->entry[a].locus2= NO_LOCUS;
805 three_pt_data->entry[a].locus3= NO_LOCUS;
806 three_pt_data->entry[a].delta1= UNFILLED;
807 three_pt_data->entry[a].delta2= UNFILLED;
808 three_pt_data->entry[a].delta3= UNFILLED;
809
810 /* now get rid of all others in which a appears */
811
812 for (j=0; j<a; j++) {
813 prev=NULL;
814 p=three_pt_data->entry[j].next;
815 while (p != NULL) {
816 if (p->locus2==a || p->locus3==a) {
817 /* go back through list and find next_same2 pointing at
818 this one, set it equal to whatever this next_same2
819 points at */
820 if (three_pt_data->entry[j].next_same2==p) {
821 three_pt_data->entry[j].next_same2=p->next_same2;
822 } else {
823 q=three_pt_data->entry[j].next;
824 while (q != p && q != NULL) {
825 if (q->next_same2 == p) {
826 q->next_same2=p->next_same2;
827 break;
828 }
829 q=q->next;
830 }
831 }
832 /* fix the next pointer */
833 if (prev==NULL) three_pt_data->entry[j].next=p->next;
834 else prev->next=p->next;
835
836 /* set p for next pass through loop, prev remains the same */
837 q=p->next;
838
839 /* now p is completely disconnected, return it to unused */
840 return_to_unused(p);
841
842 p=q;
843 } else {
844 prev=p;
845 p=p->next;
846 }
847 }
848
849 /* now all have been checked except the initial entry[j] */
850 if (three_pt_data->entry[j].locus2==a ||
851 three_pt_data->entry[j].locus3==a) {
852 p=three_pt_data->entry[j].next;
853 if (p!= NULL) {
854 three_pt_data->entry[j].locus2=p->locus2;
855 three_pt_data->entry[j].locus3=p->locus3;
856 three_pt_data->entry[j].delta1=p->delta1;
857 three_pt_data->entry[j].delta2=p->delta2;
858 three_pt_data->entry[j].delta3=p->delta3;
859 three_pt_data->entry[j].next=p->next;
860 three_pt_data->entry[j].next_same2=p->next_same2;
861 /* initial entry[j] has been set to be what was previously
862 entry[j].next, so send the entry[j].next back to unused */
863 return_to_unused(p);
864 } else {
865 three_pt_data->entry[j].next= NULL;
866 three_pt_data->entry[j].next_same2= NULL;
867 three_pt_data->entry[j].locus2= NO_LOCUS;
868 three_pt_data->entry[j].locus3= NO_LOCUS;
869 three_pt_data->entry[j].delta1= UNFILLED;
870 three_pt_data->entry[j].delta2= UNFILLED;
871 three_pt_data->entry[j].delta3= UNFILLED;
872 }
873 }
874 }
875 }
876 }
877
return_to_unused(p)878 void return_to_unused(p)
879 TRIPLE_LIST *p;
880 {
881 /* Places a previously allocated but now unused elemtn on top of the
882 unused stack */
883
884 p->next=NULL;
885 p->next_same2= NULL;
886 p->locus2= p->locus3= NO_LOCUS;
887 p->delta1= p->delta2= p->delta3= UNFILLED;
888
889 p->next= three_pt_data->unused;
890 three_pt_data->unused= p;
891 }
892
remove_triple_list(p)893 void remove_triple_list(p)
894 TRIPLE_LIST *p;
895 {
896 if (p->next!=NULL) remove_triple_list(p->next);
897 return_to_unused(p);
898 }
899
900
allocate_order_data(num_markers)901 void allocate_order_data(num_markers)
902 int num_markers;
903 {
904 int i;
905
906 array(my_group, num_markers, int);
907 array(haplo_first, num_markers, int);
908 array(haplo_next, num_markers, int);
909 array(order_first, num_markers, int);
910 array(unorder_first, num_markers, int);
911 array(order_next, num_markers, int);
912 array(class, num_markers, int);
913 array(modified, num_markers, int);
914 array(error_rate, num_markers, real);
915 matrix(note, num_markers, MAX_NOTE_LEN+1, char);
916
917 num_groups=num_orders=0;
918 for(i=0; i < num_markers; i++) {
919 modified[i]= FALSE;
920 class[i]= NO_CLASS;
921 my_group[i]= NO_GROUP;
922 error_rate[i]= DEFAULT_ERROR_RATE;
923 haplo_first[i]= haplo_next[i]= NO_LOCUS;
924 order_first[i]= unorder_first[i]= NO_LOCUS;
925 }
926
927 matrix(class_name, NUM_CLASSES, NAME_LEN+1, char);
928 strcpy(class_name[0],"no_class");
929 /* for (i=1; i<NUM_CLASSES; i++) sprintf(class_name[i],"class%d",i); */
930 for (i=1; i<NUM_CLASSES; i++) sprintf(class_name[i],"");
931 }
932
933
free_order_data(num_markers)934 void free_order_data(num_markers)
935 int num_markers;
936 {
937 unarray(my_group, int);
938 unarray(haplo_first, int);
939 unarray(haplo_next, int);
940 unarray(order_first, int);
941 unarray(unorder_first, int);
942 unarray(order_next, int);
943 num_groups=0; num_orders=0;
944
945 num_groups=0; num_orders=0;
946 unarray(modified, int);
947 unarray(error_rate, real);
948 unmatrix(note,num_markers,char);
949
950 unarray(class, int);
951 unmatrix(class_name, NUM_CLASSES+1, char);
952 }
953
954
write_order_data(fp)955 void write_order_data(fp)
956 FILE *fp;
957 {
958 int i, locus;
959
960 sf(ps,"*OrderInfo: %d %d\n",num_groups,num_orders); fpr(fp);
961 for (locus=0; locus < raw.num_markers; locus++) {
962 sf(ps,"*%-8s %4d %7.5lf %4d %4d %4d %4d %4d %4d %4d\n",
963 raw.locus_name[locus],modified[locus],error_rate[locus],
964 my_group[locus], haplo_first[locus], haplo_next[locus],
965 order_first[locus], unorder_first[locus], order_next[locus],
966 class[locus]);
967 fpr(fp);
968 }
969 fprint(fp,WRS("*Classes:\n"));
970 for (i=0; i<NUM_CLASSES; i++)
971 { fprint(fp,WRS("*")); fprint(fp,class_name[i]); fnl(fp); }
972 }
973
974
read_order_data(fp)975 void read_order_data(fp)
976 FILE *fp;
977 {
978 int i, locus;
979 int mod, group, first, next, ord_first, un_first, ord_next, class_num;
980 real rate;
981 char temp_locus_name[NAME_LEN+2], word[TOKLEN+1];
982
983 fgetln_(fp);
984 if (sscanf(ln,"%s %d %d",word,&num_groups,&num_orders)!=3 ||
985 !streq(word,"*OrderInfo:")) baddata("expected '*OrderInfo: # #'");
986
987 for (locus=0; locus < raw.num_markers; locus++) {
988 fgetln_(fp);
989
990 if (!nstoken(&ln,sREQUIRED,temp_locus_name,NAME_LEN+1) ||
991 temp_locus_name[0]!='*' || len(temp_locus_name)<2)
992 baddata("expected *name");
993 else if (!streq(raw.locus_name[locus],&temp_locus_name[1]))
994 baddata("locus names don't match");
995
996 if (sscanf(ln,"%d %lf %d %d %d %d %d %d %d",&mod,&rate,&group,
997 &first,&next,&ord_first,&un_first,&ord_next,&class_num)!=9)
998 baddata("bad order info line");
999
1000 modified[locus]= mod;
1001 error_rate[locus]= rate;
1002 my_group[locus]= group;
1003 haplo_first[locus]= first;
1004 haplo_next[locus]= next;
1005 order_first[locus]= ord_first;
1006 unorder_first[locus]= un_first;
1007 order_next[locus]= ord_next;
1008 class[locus]= class_num;
1009 }
1010 fgetln_(fp); if (!streq(ln,"*Classes:")) baddata("bad classes");
1011 for (i=0; i<NUM_CLASSES; i++)
1012 { fgetln_(fp); strcpy(class_name[i],ln+1); }
1013 }
1014
1015
read_two_pt(fp)1016 void read_two_pt(fp)
1017 FILE *fp;
1018 {
1019 int a, b, n;
1020 int i, j, k, n_unlinked=0, n_miss=0, n_real=0, n_to_read;
1021 bool inc, missing_means_unlinked=FALSE; /* eg missing means missing */
1022 real theta, lod, thetam, thetaf, lodsex;
1023 TWO_PT_DATA *new_two;
1024
1025 getdataln(fp);
1026 if (sscanf(ln,"%d %d %d",&n_real,&n_unlinked,&n_miss)!=3)
1027 baddata("expected two-pt count");
1028 if (n_unlinked>n_miss) missing_means_unlinked=TRUE;
1029 expand_two_pt(n_real+2);
1030
1031 if (missing_means_unlinked) {
1032 for (i=0; i<raw.num_markers; i++)
1033 for (j=0; j<=i; j++)
1034 two_pt_data[i][j]=UNLINKED_TWO_PT;
1035 }
1036 if (missing_means_unlinked) n_to_read= n_real + n_miss;
1037 else n_to_read= n_real + n_unlinked;
1038
1039 run {
1040 for (k=0; k<n_to_read; k++) {
1041 getdataln(fp);
1042 n= sscanf(ln,"%d %d %lf %lf %lf %lf %lf",&a,&b,&theta,&lod,
1043 &thetam,&thetaf,&lodsex);
1044 new_two= get_next_two_pt_entry(a,b,&inc);
1045 if (n==2) {
1046 if (missing_means_unlinked) two_pt_data[a][b]= NULL;
1047 else two_pt_data[a][b]= UNLINKED_TWO_PT;
1048 } else if(n==4) {
1049 new_two->theta[NOSEX]= theta;
1050 new_two->lodscore[NOSEX]= lod;
1051 two_pt_data[a][b]= new_two;
1052 new_two->used=TRUE;
1053 two_pt_used++;
1054 } else if (n==7) {
1055 new_two->theta[NOSEX]= theta;
1056 new_two->lodscore[NOSEX]= lod;
1057 new_two->theta[MALE]= thetam;
1058 new_two->theta[FEMALE]= thetaf;
1059 new_two->lodscore[SEXSPEC]= lodsex;
1060 two_pt_data[a][b]= new_two;
1061 new_two->used=TRUE;
1062 if (inc) two_pt_used++;
1063 }
1064 }
1065 } except_when(ENDOFILE) { }
1066 }
1067
1068
write_two_pt(fp)1069 void write_two_pt(fp)
1070 FILE *fp;
1071 {
1072 int i, j, n_unlinked=0, n_miss=0, n_real=0;
1073 bool missing_means_unlinked=FALSE; /* eg missing means missing */
1074
1075 for (i=0; i<raw.num_markers-1; i++)
1076 for (j=0; j<=i; j++) {
1077 if (two_pt_data[i][j]==NULL) n_miss++;
1078 else if (two_pt_data[i][j]==UNLINKED_TWO_PT) n_unlinked++;
1079 else n_real++;
1080 }
1081
1082 if (n_unlinked>n_miss) missing_means_unlinked=TRUE;
1083 sf(ps,"%d %d %d \n",n_real,n_unlinked,n_miss); fpr(fp);
1084
1085 for (i=0; i<raw.num_markers-1; i++) {
1086 for (j=0; j<=i; j++) {
1087 if (two_pt_data[i][j]==NULL) {
1088 if (!missing_means_unlinked) continue;
1089 else sf(ps,"%d %d\n",i,j); fpr(fp);
1090 } else if(two_pt_data[i][j] == UNLINKED_TWO_PT) {
1091 if (missing_means_unlinked) continue;
1092 sf(ps,"%d %d\n",i,j); fpr(fp);
1093 } else if (raw.data_type == F2) {
1094 sf(ps,"%d %d %.3lf %.3lf\n",i,j,
1095 two_pt_data[i][j]->theta[NOSEX],
1096 two_pt_data[i][j]->lodscore[NOSEX]); fpr(fp);
1097 } else {
1098 sf(ps,"%d %d %.3lf %.3lf %.3lf %.3lf %.3lf\n",i,j, /* lf */
1099 two_pt_data[i][j]->theta[NOSEX],
1100 two_pt_data[i][j]->lodscore[NOSEX],
1101 two_pt_data[i][j]->theta[MALE],
1102 two_pt_data[i][j]->theta[FEMALE],
1103 two_pt_data[i][j]->lodscore[SEXSPEC]); fpr(fp);
1104 }
1105 }
1106 }
1107 }
1108
1109
read_three_pt(fp)1110 void read_three_pt(fp)
1111 FILE *fp;
1112 {
1113 int i,j,k,filenum;
1114 real d1,d2,d3;
1115
1116 while((fscanf(fp,"%d %d %d %lf %lf %lf\n",&i,&j,&k,&d1,&d2,&d3)) == 6) {
1117 insert_triple(i,j,k,d1,d2,d3);
1118 }
1119 three_pt_touched= FALSE;
1120 }
1121
1122
write_three_pt(fp)1123 void write_three_pt(fp)
1124 FILE *fp;
1125 {
1126 int i, j;
1127 TRIPLE_LIST *p;
1128
1129 for (i= 0; i < raw.num_markers; i++) {
1130 if (three_pt_data->entry[i].locus2 != NO_LOCUS) {
1131 sf(ps,"%d %d %d %.3lf %.3lf %.3lf\n",i,
1132 three_pt_data->entry[i].locus2,three_pt_data->entry[i].locus3,
1133 three_pt_data->entry[i].delta1,three_pt_data->entry[i].delta2,
1134 three_pt_data->entry[i].delta3);
1135 fpr(fp);
1136 p= three_pt_data->entry[i].next;
1137 while(p != NULL) {
1138 sf(ps,"%d %d %d %.3lf %.3lf %.3lf\n",i,p->locus2,p->locus3,
1139 p->delta1,p->delta2,p->delta3);
1140 fpr(fp);
1141 p= p->next;
1142 }
1143 }
1144 }
1145 }
1146