1 #ifndef GETDATA_INCLUDE
2 #include "getdata.h"
3 #endif
4
5 #ifdef DMEMDEBUG
6 #define MEMDEBUG
7 #include "memdebug.h"
8 #endif
9
10 unsigned int baseA=0, baseC=1, baseG=2, baseT=3;
11
12 extern FILE *simlog, *weightfile;
13 extern long locus, population;
14
15 extern void read_line(FILE *source, char *line);
16 extern void setupmsatdata(msatdata **ms, long numpop, long numloci,
17 long numind);
18 extern void freemsatdata(msatdata *ms);
19 extern void calculate_steps(msatdata *ms);
20 extern boolean isinvariant(option_struct *op, data_fmt *data, long site,
21 creature *cr);
22
23
24 /**********************************************************
25 * setupdata() initializes the appropiate data structures */
setupdata(data_fmt * data,char datatype,long numpop,long numloci,long numind,long * numsites,char * title)26 void setupdata(data_fmt *data, char datatype, long numpop, long numloci,
27 long numind, long *numsites, char *title)
28 {
29
30 switch(datatype) {
31 case 'a':
32 data->dnaptr = NULL;
33 data->msptr = NULL;
34 break;
35 case 'b':
36 case 'm':
37 data->dnaptr = NULL;
38 setupmsatdata(&(data->msptr),numpop,numloci,numind);
39 strcpy(data->msptr->title,title);
40 calculate_steps(data->msptr);
41 break;
42 case 'n':
43 case 's':
44 setupdnadata(&(data->dnaptr),numsites,numind,numloci,numpop);
45 strcpy(data->dnaptr->title,title);
46 data->msptr = NULL;
47 break;
48 default:
49 fprintf(ERRFILE,"setupdata, found an unknown datatype %c\n",
50 datatype);
51 exit(-1);
52 }
53 } /* setupdata */
54
55
56 /***********************************************
57 * freedata() frees the general data structure */
freedata(data_fmt * data)58 void freedata(data_fmt *data)
59 {
60
61 if (data->dnaptr) {freednadata(data->dnaptr); data->dnaptr = NULL;}
62 if (data->msptr) {freemsatdata(data->msptr); data->msptr = NULL;}
63 } /* freedata */
64
65
66 /********************************************************************
67 * getdata_numpop() returns the number of populations stored in the *
68 * general data structure or FLAGLONG if it fails. */
getdata_numpop(option_struct * op,data_fmt * data)69 long getdata_numpop(option_struct *op, data_fmt *data)
70 {
71
72 switch(op->datatype)
73 {
74 case 'a':
75 break;
76 case 'b':
77 case 'm':
78 return(data->msptr->numpop);
79 break;
80 case 'n':
81 case 's':
82 return(data->dnaptr->numpop);
83 break;
84 default:
85 return(FLAGLONG);
86 break;
87 }
88
89 return(FLAGLONG);
90
91 } /* getdata_numpop */
92
93
94 /**************************************************************
95 * getdata_numloci() returns the number of loci stored in the *
96 * general data structure or FLAGLONG if it fails. */
getdata_numloci(option_struct * op,data_fmt * data)97 long getdata_numloci(option_struct *op, data_fmt *data)
98 {
99
100 switch(op->datatype)
101 {
102 case 'a':
103 break;
104 case 'b':
105 case 'm':
106 return(data->msptr->numloci);
107 break;
108 case 'n':
109 case 's':
110 return(data->dnaptr->numloci);
111 break;
112 default:
113 return(FLAGLONG);
114 break;
115 }
116
117 return(FLAGLONG);
118
119 } /* getdata_numloci */
120
121
122 /********************************************************************
123 * getdata_nummarkers() returns the number of markers that the user *
124 * provides in the input data. FLAGLONG is returned on failure. */
getdata_nummarkers(option_struct * op,data_fmt * data)125 long getdata_nummarkers(option_struct *op, data_fmt *data)
126 {
127
128 switch(op->datatype)
129 {
130 case 'a':
131 break;
132 case 'b':
133 case 'm':
134 return(data->msptr->numloci);
135 break;
136 case 'n':
137 case 's':
138 return(data->dnaptr->sites[locus]);
139 break;
140 default:
141 return(FLAGLONG);
142 break;
143 }
144
145 return(FLAGLONG);
146
147 } /* getdata_nummarkers */
148
149
150 /**********************************************************************
151 * getdata_numseq() returns the number of individual sequences in the *
152 * data stored in the general data structure or FLAGLONG if it fails. */
getdata_numseq(option_struct * op,data_fmt * data)153 long getdata_numseq(option_struct *op, data_fmt *data)
154 {
155
156 switch(op->datatype)
157 {
158 case 'a':
159 break;
160 case 'b':
161 case 'm':
162 return(data->msptr->numind[population]*NUMCHROM);
163 break;
164 case 'n':
165 case 's':
166 return(data->dnaptr->numseq[population]);
167 break;
168 default:
169 return(FLAGLONG);
170 break;
171 }
172
173 return(FLAGLONG);
174
175 } /* getdata_numseq */
176
177
178 /********************************************************************
179 * getdata_numtips() returns the number of tips present in the tree *
180 * stored in the general data structure or FLAGLONG if it fails. */
getdata_numtips(option_struct * op,data_fmt * data)181 long getdata_numtips(option_struct *op, data_fmt *data)
182 {
183
184 switch(op->datatype)
185 {
186 case 'a':
187 break;
188 case 'b':
189 case 'm':
190 return(data->msptr->numind[population]*NUMCHROM);
191 break;
192 case 'n':
193 if (op->panel)
194 return(data->dnaptr->numseq[population] +
195 op->numpanel[population]);
196 case 's':
197 return(data->dnaptr->numseq[population]);
198 break;
199 default:
200 return(FLAGLONG);
201 break;
202 }
203
204 return(FLAGLONG);
205
206 } /* getdata_numtips */
207
208
209 /*******************************************************************
210 * getdata_sitecount() returns the contents of the sitecount array *
211 * at the passed marker or FLAGLONG if it fails. */
getdata_sitecount(option_struct * op,data_fmt * data,long marker)212 long getdata_sitecount(option_struct *op, data_fmt *data, long marker)
213 {
214
215 switch(op->datatype)
216 {
217 case 'a':
218 break;
219 case 'b':
220 case 'm':
221 break;
222 case 'n':
223 case 's':
224 return(data->dnaptr->sitecount[locus][marker]);
225 break;
226 default:
227 return(FLAGLONG);
228 break;
229 }
230
231 return(FLAGLONG);
232
233 } /* getdata_sitecount */
234
235
236 /*********************************************************************
237 * getdata_markersite() returns the contents of the markersite array *
238 * at the passed marker or FLAGLONG if it fails. */
getdata_markersite(option_struct * op,data_fmt * data,long marker)239 long getdata_markersite(option_struct *op, data_fmt *data, long marker)
240 {
241
242 switch(op->datatype)
243 {
244 case 'a':
245 break;
246 case 'b':
247 case 'm':
248 break;
249 case 'n':
250 case 's':
251 return(data->dnaptr->markersite[locus][marker]);
252 break;
253 default:
254 return(FLAGLONG);
255 break;
256 }
257
258 return(FLAGLONG);
259
260 } /* getdata_markersite */
261
262
263 /*******************************************************************
264 * getdata_space() returns the length of the passed link stored in *
265 * the general data structure or FLAGLONG if it fails. */
getdata_space(option_struct * op,data_fmt * data,long target)266 double getdata_space(option_struct *op, data_fmt *data, long target)
267 {
268
269 switch(op->datatype)
270 {
271 case 'a':
272 break;
273 case 'b':
274 case 'm':
275 return(data->msptr->mspace[population][locus][target]);
276 break;
277 case 'n':
278 if (target == 0) {
279 return(data->dnaptr->sspace[population][locus][target] +
280 data->dnaptr->sspace[population][locus][getdata_nummarkers(op,data)]);
281 break;
282 }
283 case 's':
284 return(data->dnaptr->sspace[population][locus][target]);
285 break;
286 default:
287 return(FLAGLONG);
288 break;
289 }
290
291 return(FLAGLONG);
292
293 } /* getdata_space */
294
295
296 /***************************************************************
297 * read_spacefile() reads the spacing info from the designated *
298 * spacefile. *
299 * *
300 * entry "0" from a spacefile is read as the number of spaces *
301 * before the first marker. */
read_spacefile(FILE * file,option_struct * op,data_fmt * data)302 void read_spacefile(FILE *file, option_struct *op, data_fmt *data)
303 {
304 long numgaps, gapcnt, markernum;
305 char *input,*tok;
306 dnadata *dna;
307
308 input = (char *)calloc(LINESIZE,sizeof(char));
309 *input = '\0';
310
311 dna = data->dnaptr;
312
313 read_line(file,input);
314 sscanf(input,"%ld%c[^\n]",&numgaps,&(dna->sdlm));
315
316 for(gapcnt = 0; gapcnt < numgaps; gapcnt++) {
317 read_line(file,input);
318 tok = strtok(input,&dna->sdlm);
319 markernum = atol(tok) - 1;
320 /* if the entry is supposed to be the spacing before the first
321 marker, then put it at its proper place */
322 if (markernum == -1) markernum = getdata_nummarkers(op,data);
323 dna->sspace[population][locus][markernum] =
324 (double)atof(strtok(NULL," "));
325 }
326
327 } /* read_spacefile */
328
329
330 /*****************************************************************
331 * longcmp() is a qsort helper for longs, thanks to Peter Beerli */
longcmp(const void * v1,const void * v2)332 int longcmp (const void *v1, const void *v2)
333 {
334 if (*(long *) v1 < *(long *) v2)
335 {
336 return -1;
337 }
338 else
339 {
340 if (*(long *) v1 > *(long *) v2)
341 {
342 return 1;
343 }
344 else
345 return 0;
346 }
347 } /* longcmp */
348
349
350 /*********************************************
351 * doublecmp() is a qsort helper for doubles */
doublecmp(const void * v1,const void * v2)352 int doublecmp (const void *v1, const void *v2)
353 {
354 if (*(double *) v1 < *(double *) v2)
355 {
356 return -1;
357 }
358 else
359 {
360 if (*(double *) v1 > *(double *) v2)
361 {
362 return 1;
363 }
364 else
365 return 0;
366 }
367 } /* doublecmp */
368
369
370 /********************************************************************
371 * read_flipfile() reads the questionable site information from *
372 * the passed file. *
373 * *
374 * read_flipfile() can only be called after treesetup(). *
375 * *
376 * Expected file format of a flipfile: *
377 * 1st line the number of individuals covered by the flipfile *
378 * *
379 * then, for each individual in the file: *
380 * a line containing one of the following *
381 * "all" -> use if all the site are flippable. *
382 * *
383 * numbers seperated by spaces -> use if you're specifying *
384 * which sites are flippable. *
385 * (e.g. "5 4 67 89 32 100" would mean that there are 5 *
386 * flippable sites with position numbers 4, 32, 67, 89 *
387 * and 100) *
388 * *
389 * an exclamation point immediately followed by a list of *
390 * numbers seperated by spaces -> use if you're specifying *
391 * which sites are NOT flippable. *
392 * (e.g. "!5 4 67 89 32 100" means that there are 5 *
393 * non-flippable sites with position numbers 4, 32, 67, *
394 * 89 and 100) *
395 * *
396 * the last line should just contain the word "end". */
read_flipfile(option_struct * op,data_fmt * data,tree * tr,FILE * file)397 void read_flipfile(option_struct *op, data_fmt *data, tree *tr, FILE *file)
398 {
399 long i, j, whichcreature, numcreatures, site, numsites, temp, *tempa;
400 char *fileline, *fline, *num, char1;
401 creature *cr;
402
403 fscanf(file,"%ld",&numcreatures);
404 if (numcreatures != getdata_numtips(op,data)/NUMHAPLOTYPES) {
405 fprintf(ERRFILE,"\nERROR--number of sequences in infile and number");
406 fprintf(ERRFILE," of individuals in flipfile don't match!\n\n");
407 exit(-1);
408 }
409
410 numsites = getdata_nummarkers(op,data);
411
412 for(whichcreature = 0; whichcreature < numcreatures; whichcreature++) {
413 fline = fileline = (char *)calloc(LINESIZE,sizeof(char));
414 read_line(file,fileline);
415 char1 = fileline[0];
416 if (char1 != 'a' && char1 != '!' && !isdigit(char1)) {
417 fprintf(ERRFILE,"\nERROR--not a valid flipfile entry for line");
418 fprintf(ERRFILE," containing:\n %s\n",fileline);
419 exit(-1);
420 }
421 cr = &(tr->creatures[whichcreature]);
422 switch(char1) {
423 case 'a':
424 cr->numflipsites = numsites;
425 cr->flipsites = (long *)calloc(numsites,sizeof(long));
426 for(site = 0; site < numsites; site++) cr->flipsites[site] = site;
427 break;
428 case '!':
429 fileline++;
430 temp = atol(strtok(fileline," "));
431 cr->numflipsites = numsites-temp;
432 tempa = (long *)calloc(temp,sizeof(long));
433 for(i = 0; i < temp; i++) tempa[i] = atol(strtok(NULL," "));
434 qsort((void *)tempa,temp,sizeof(long),longcmp);
435 cr->flipsites = (long *)calloc(numsites-temp,sizeof(long));
436 for(site = 0, i = 0, j = 0; site < numsites; site++, j++) {
437 if (i < temp) {
438 if (site == tempa[i]) {
439 j--;
440 i++;
441 continue;
442 }
443 }
444 cr->flipsites[j] = site;
445 }
446 free(tempa);
447 break;
448 default: /* assume we're dealing with simply digits */
449 cr->numflipsites = atol(strtok(fileline," "));
450 cr->flipsites = (long *)calloc(cr->numflipsites,sizeof(long));
451 for(site = 0; site < cr->numflipsites; site++) {
452 num = strtok(NULL," ");
453 if (!num) {
454 fprintf(ERRFILE,"ERROR--in flipfile, creature %ld",
455 whichcreature);
456 fprintf(ERRFILE," %ldth entry, failure to read.\n",
457 site+1);
458 exit(-1);
459 }
460 cr->flipsites[site] = atol(num);
461 /* cr->flipsites[site] = atol(strtok(NULL," ")); */
462 }
463 qsort((void *)cr->flipsites,cr->numflipsites,sizeof(long),longcmp);
464 break;
465 }
466 free(fline);
467 }
468
469
470 } /* read_flipfile */
471
472
473 /*******************************************************************
474 * pruneflips() removes "uninteresting" entries from the flippable *
475 * list. It also "verifies" the marker aliasing for likelihood. *
476 * *
477 * "uninteresting" = invariant */
pruneflips(option_struct * op,data_fmt * data,tree * tr)478 void pruneflips(option_struct *op, data_fmt *data, tree *tr)
479 {
480 long i, whichcreature, numcreatures, site, nummarkers, *fmarkers, temp,
481 *siteptr;
482 creature *cr;
483 boolean noflips;
484
485 numcreatures = getdata_numtips(op,data)/NUMHAPLOTYPES;
486 nummarkers = getdata_nummarkers(op,data);
487 siteptr = data->siteptr;
488 fmarkers = (long *)calloc(nummarkers,sizeof(long));
489
490 for(whichcreature = 0; whichcreature < numcreatures; whichcreature++) {
491 cr = &(tr->creatures[whichcreature]);
492 for(site = cr->numflipsites-1; site > -1; site--) {
493 if (isinvariant(op,data,cr->flipsites[site],cr)) {
494 cr->numflipsites--;
495 for(i = site; i < cr->numflipsites; i++) {
496 cr->flipsites[i] = cr->flipsites[i+1];
497 }
498 } else fmarkers[cr->flipsites[site]] = 1;
499 }
500 }
501
502 /* now to disable datalikelihood aliasing on all remaining flippable
503 markers-if removed then fliphap must do a complete redo of siteptr
504 array, not just rebuild_alias() */
505 for(site = 0; site < nummarkers; site++) {
506 if (!fmarkers[site]) continue;
507
508 temp = siteptr[site];
509 siteptr[site] = 0;
510 for(i = site+1; i < nummarkers; i++) {
511 if (siteptr[i] == site+1) {
512 siteptr[i] = temp;
513 break;
514 }
515 }
516 }
517
518 free(fmarkers);
519
520 noflips = TRUE;
521 for(whichcreature = 0; whichcreature < numcreatures; whichcreature++) {
522 cr = &(tr->creatures[whichcreature]);
523 if (cr->numflipsites != 0) noflips = FALSE;
524 }
525 if (noflips) {
526 fprintf(ERRFILE,"\n\nAll sites specified as phase-unknown are ");
527 fprintf(ERRFILE,"invariant, probable error in specifying flipfile.\n\n");
528 exit(-1);
529 }
530
531 } /* pruneflips */
532
533
534 /*********************************************************
535 * read_indname() reads the individual names of the data */
read_indname(FILE * file,data_fmt * data,long pop,long lowcus,long ind,long nmlength,char datatype)536 void read_indname(FILE *file, data_fmt *data, long pop, long lowcus,
537 long ind, long nmlength, char datatype)
538 {
539 long i=0;
540
541 switch(datatype) {
542 case 'a':
543 break;
544 case 'b':
545 case 'm':
546 while(i<nmlength)
547 data->msptr->indnames[pop][lowcus][ind][i++]=getc(file);
548 data->msptr->indnames[pop][lowcus][ind][nmlength]='\0';
549 break;
550 case 'n':
551 case 's':
552 while(i<nmlength)
553 data->dnaptr->indnames[pop][lowcus][ind][i++]=getc(file);
554 data->dnaptr->indnames[pop][lowcus][ind][nmlength]='\0';
555 break;
556 default:
557 fprintf(ERRFILE,"read_indname, can't get here!\n");
558 break;
559 }
560
561 } /* read_indname */
562
563
564 /******************************************************************
565 * read_microalleles() actually reads in the microsattellite data */
read_microalleles(FILE * infile,msatdata * data,long pop,long ind)566 void read_microalleles(FILE *infile, msatdata *data, long pop, long ind)
567 {
568 char *input, dlm[2],ddlm[2], *a, *a1, *a2;
569 long lowcus,i;
570
571 input = (char *) calloc(1,sizeof(char)*(LINESIZE+1));
572 a = (char *) calloc(1,sizeof(char)*LINESIZE);
573 a1 = (char *) calloc(1,sizeof(char)*LINESIZE);
574 a2 = (char *) calloc(1,sizeof(char)*LINESIZE);
575 dlm[0]=data->dlm, dlm[1]='\0';
576 ddlm[0]=' ', ddlm[1]='\0';
577
578 read_line(infile,input);
579
580 for(lowcus=0; lowcus<data->numloci; lowcus++) {
581 if(input[0]=='\0') read_line(infile,input);
582 while(isspace((int) *input)) input++;
583 i=0;
584 while(input[i]!=' ' && input[i]!=dlm[0]) {
585 a1[i]=input[i];
586 i++;
587 }
588 a1[i]='\0';
589 input += i;
590 i=0;
591 if(input[i]==dlm[0]){
592 input++;
593 while(input[i]!=' ' && input[i]!='\0') {
594 a2[i]=input[i];
595 i++;
596 }
597 a2[i]='\0';
598 if(a2[0]=='\0') strcpy(a2,a1);
599 input += i;
600 } else strcpy(a2,a1);
601 data->msats[pop][ind][lowcus][0] = atol(a1);
602 data->msats[pop][ind][lowcus][1] = atol(a2);
603 }
604
605 free(a);
606 free(a1);
607 free(a2);
608 free(input);
609
610 } /* read_microalleles */
611
612
613 /*************************************************************
614 * read_ind_seq() reads in the first line of a dna sequence, *
615 * subsequent lines will be read by finish_read_seq() */
read_ind_seq(FILE * infile,dnadata * data,option_struct * op,long lowcus,long pop,long ind,long baseread)616 long read_ind_seq(FILE *infile, dnadata *data, option_struct *op,
617 long lowcus, long pop, long ind, long baseread)
618 {
619 long j;
620 char charstate;
621
622 j = (op->interleaved) ? baseread : 0;
623 charstate = getc(infile);
624 ungetc((int) charstate,infile);
625 while (j < data->sites[lowcus] &&
626 !(op->interleaved && charstate=='\n')) {
627 charstate = getc(infile);
628 if (charstate == '\n') {
629 if(op->interleaved) return j;
630 else charstate=' ';
631 }
632 if (isspace(charstate) || isdigit(charstate))
633 continue;
634 charstate = uppercase(charstate);
635 if ((strchr("ABCDGHKMNRSTUVWXY?O-",(int) charstate)) == NULL){
636 fprintf(ERRFILE,"ERROR: BAD BASE: %c AT POSITION %5ld OF",
637 charstate,j);
638 fprintf(ERRFILE," INDIVIDUAL %3li in POPULATION %ld\n",ind,pop);
639 exit(-1);
640 }
641 data->seqs[pop][lowcus][ind][j++] = charstate;
642 }
643
644 charstate = getc(infile); /* swallow the \n */
645 return j;
646
647 } /* read_ind_seq */
648
649
650 /***************************************************************
651 * finish_read_seq() finishes reading in the dna data begun by *
652 * read_ind_seq(). */
finish_read_seq(FILE * infile,data_fmt * data,option_struct * op,long pop,long baseread)653 void finish_read_seq(FILE *infile, data_fmt *data, option_struct *op,
654 long pop, long baseread)
655 {
656
657 long ind, baseread2=0, lowcus=0;
658
659 if(op->interleaved){
660 while(baseread<data->dnaptr->sites[0]) {
661 for(ind=0; ind < data->dnaptr->numseq[pop]; ind++) {
662 baseread2 =
663 read_ind_seq(infile,data->dnaptr,op,lowcus,pop,ind,baseread);
664 }
665 baseread = baseread2;
666 }
667 }
668
669 for(lowcus = 1; lowcus < getdata_numloci(op,data); lowcus++){
670 baseread=0;
671 /* "population" doesn't exist at this point so... */
672 for(ind=0; ind < data->dnaptr->numseq[pop]; ind++) {
673 read_indname(infile,data,pop,lowcus,ind,NMLNGTH,op->datatype);
674 baseread =
675 read_ind_seq(infile,data->dnaptr,op,lowcus,pop,ind,0);
676 }
677 if(op->interleaved){
678 while(baseread<data->dnaptr->sites[lowcus]){
679 for (ind=0; ind < data->dnaptr->numseq[pop]; ind++) {
680 baseread2 =
681 read_ind_seq(infile,data->dnaptr,op,lowcus,pop,ind,baseread);
682 }
683 baseread=baseread2;
684 }
685 }
686 }
687
688 } /* finish_read_seq */
689
690
691 /*****************************************************
692 * read_popdata() reads in the data from file INFILE */
read_popdata(FILE * infile,data_fmt * data,long pop,option_struct * op)693 void read_popdata(FILE *infile, data_fmt *data, long pop,
694 option_struct *op)
695 {
696 long ind, baseread=0, lowcus=0, numentries;
697
698 /* initialization for lint */
699 numentries = 0;
700
701 switch(op->datatype) {
702 case 'a':
703 break;
704 case 'b':
705 case 'm':
706 numentries = data->msptr->numind[pop];
707 break;
708 case 'n':
709 case 's':
710 numentries = data->dnaptr->numseq[pop];
711 break;
712 default:
713 fprintf(ERRFILE,"read_popdata: can't be here!\n");
714 exit(-1);
715 }
716
717 for(ind=0; ind < numentries; ind++) {
718 read_indname(infile,data,pop,lowcus,ind,NMLNGTH,op->datatype);
719 switch(op->datatype) {
720 case 'a':
721 /* read_alleles(infile, data,pop, ind); */
722 break;
723 case 'b':
724 case 'm':
725 /* if (data->dlm == '\0') read_alleles(infile, data,pop, ind);
726 else */ read_microalleles(infile,data->msptr,pop,ind);
727 break;
728 case 'n':
729 case 's':
730 baseread=read_ind_seq(infile,data->dnaptr,op,lowcus,pop,ind,0L);
731 break;
732 default:
733 fprintf(stderr,"Wrong datatype, only the types a, m, s");
734 fprintf(stderr," (electrophoretic alleles, \nmicrosatellite data,");
735 fprintf(stderr,"sequence data) are allowed.\n");
736 exit(-1);
737 break;
738 }
739 }
740 if(op->datatype=='s' || op->datatype=='n')
741 finish_read_seq(infile,data,op,pop,baseread);
742
743 } /* read_popdata */
744
745
746 /***********************************************************************
747 * setpopstuff() sets the data structure's population specific fields: *
748 * # of individuals per population, population title. */
setpopstuff(data_fmt * data,long pop,long numind,char * poptitle,char datatype)749 void setpopstuff(data_fmt *data, long pop, long numind, char *poptitle,
750 char datatype)
751 {
752
753 switch(datatype) {
754 case 'a':
755 break;
756 case 'b':
757 case 'm':
758 data->msptr->numind[pop] = numind;
759 strcpy(data->msptr->popnames[pop],poptitle);
760 break;
761 case 'n':
762 case 's':
763 data->dnaptr->numseq[pop] = numind;
764 strcpy(data->dnaptr->popnames[pop],poptitle);
765 break;
766 default:
767 fprintf(ERRFILE,"setpopstuff, found an unknown datatype %c\n",
768 datatype);
769 exit(-1);
770 }
771
772 } /* setpopstuff */
773
774
775 /*****************************************************
776 * setupdnadata() initializes the dna data structure */
setupdnadata(dnadata ** dna,long * numsites,long numseq,long numloci,long numpop)777 void setupdnadata(dnadata **dna, long *numsites, long numseq,
778 long numloci, long numpop)
779 {
780 long i, j, k, maxsites=0;
781
782 (*dna) = (dnadata *)calloc(1,sizeof(dnadata));
783
784 (*dna)->title = (char *)calloc(LINESIZE,sizeof(char));
785 (*dna)->popnames = (char **)calloc(numpop,sizeof(char *));
786 (*dna)->popnames[0] = (char *)calloc(numpop*LINESIZE,sizeof(char));
787 for(i = 1; i < numpop; i++)
788 (*dna)->popnames[i] = (*dna)->popnames[0] + i*LINESIZE;
789
790 (*dna)->numpop = numpop;
791 (*dna)->numloci = numloci;
792
793 (*dna)->numseq = (long *)calloc(numpop,sizeof(long));
794 /* This is reset for all populations, except the first,
795 by setpopstuff() */
796 for(i = 0; i < numpop; i++) (*dna)->numseq[i] = numseq;
797
798 (*dna)->sites = (long *)calloc(numloci,sizeof(long));
799 (*dna)->sitecount = (long **)calloc(numloci,sizeof(long *));
800 (*dna)->markersite = (long **)calloc(numloci,sizeof(long *));
801 for(i = 0; i < numloci; i++) {
802 (*dna)->sites[i] = numsites[i];
803 if (numsites[i] > maxsites) maxsites = numsites[i];
804 }
805 (*dna)->sitecount[0] = (long *)calloc(numloci*maxsites,sizeof(long));
806 (*dna)->markersite[0] = (long *)calloc(numloci*maxsites,sizeof(long));
807 for(i = 1; i < numloci; i++) {
808 (*dna)->sitecount[i] = (*dna)->sitecount[0] + i*maxsites;
809 (*dna)->markersite[i] = (*dna)->markersite[0] + i*maxsites;
810 }
811
812 (*dna)->seqs = (char ****)calloc(numpop,sizeof(char ***));
813 (*dna)->indnames = (char ****)calloc(numpop,sizeof(char ***));
814 (*dna)->sspace = (double ***)calloc(numpop,sizeof(double **));
815
816 (*dna)->seqs[0] = (char ***)calloc(numpop*numloci,sizeof(char **));
817 (*dna)->indnames[0] = (char ***)calloc(numpop*numloci,sizeof(char **));
818 (*dna)->sspace[0] = (double **)calloc(numpop*numloci,sizeof(double *));
819 for(i = 1; i < numpop; i++) {
820 (*dna)->seqs[i] = (*dna)->seqs[0] + i*numloci;
821 (*dna)->indnames[i] = (*dna)->indnames[0] + i*numloci;
822 (*dna)->sspace[i] = (*dna)->sspace[0] + i*numloci;
823 }
824
825 (*dna)->seqs[0][0] =
826 (char **)calloc(numpop*numloci*numseq,sizeof(char *));
827 (*dna)->indnames[0][0] =
828 (char **)calloc(numpop*numloci*numseq,sizeof(char *));
829 /* use maxsites+1 in allocating sspace to include spacing before
830 the first marker */
831 (*dna)->sspace[0][0] =
832 (double *)calloc(numpop*numloci*(maxsites+1),sizeof(double));
833 for(i = 0; i < numpop; i++)
834 for(j = 0; j < numloci; j++) {
835 (*dna)->seqs[i][j] = (*dna)->seqs[0][0] + i*numloci*numseq
836 + j*numseq;
837 (*dna)->indnames[i][j] = (*dna)->indnames[0][0] + i*numloci*numseq
838 + j*numseq;
839 (*dna)->sspace[i][j] = (*dna)->sspace[0][0] + i*numloci*(maxsites+1)
840 + j*(maxsites+1);
841 }
842
843 for(i = 0; i < numpop; i++)
844 for(j = 0; j < numloci; j++) {
845 for(k = 0; k < maxsites; k++)
846 (*dna)->sspace[i][j][k] = 1.0;
847 /* initialize the most leftward space entry to zero, no additional sites
848 before the first marker. */
849 (*dna)->sspace[i][j][(*dna)->sites[i]] = 0.0;
850 }
851
852 (*dna)->seqs[0][0][0] =
853 (char *)calloc(numpop*numloci*numseq*maxsites,sizeof(char));
854 (*dna)->indnames[0][0][0] =
855 (char *)calloc(numpop*numloci*numseq*(NMLNGTH+1),sizeof(char));
856 for(i = 0; i < numpop; i++)
857 for(j = 0; j < numloci; j++)
858 for(k = 0; k < numseq; k++) {
859 (*dna)->seqs[i][j][k] = (*dna)->seqs[0][0][0]
860 + i*numloci*numseq*maxsites + j*numseq*maxsites + k*maxsites;
861 (*dna)->indnames[i][j][k] = (*dna)->indnames[0][0][0]
862 + i*numloci*numseq*(NMLNGTH+1) + j*numseq*(NMLNGTH+1)
863 + k*(NMLNGTH+1);
864 }
865
866 (*dna)->dnaweight = (long *)calloc(maxsites,sizeof(long));
867 for(i = 0; i < maxsites; i++) (*dna)->dnaweight[i] = 1;
868
869 (*dna)->segranges0 = NULL;
870 (*dna)->segranges1 = NULL;
871 (*dna)->segranges2 = NULL;
872 (*dna)->segranges3 = NULL;
873
874 } /* setupdnadata */
875
876
877 /**********************************************
878 * freednadata() frees the dna data structure */
freednadata(dnadata * dna)879 void freednadata(dnadata *dna)
880 {
881 free(dna->popnames[0]);
882 free(dna->popnames);
883 free(dna->title);
884 free(dna->dnaweight);
885 free(dna->numseq);
886 free(dna->sites);
887 free(dna->sspace[0]);
888 free(dna->sspace);
889 free(dna->sitecount[0]);
890 free(dna->sitecount);
891 free(dna->markersite[0]);
892 free(dna->markersite);
893 free(dna->indnames[0][0][0]);
894 free(dna->indnames[0][0]);
895 free(dna->indnames[0]);
896 free(dna->indnames);
897 free(dna->seqs[0][0][0]);
898 free(dna->seqs[0][0]);
899 free(dna->seqs[0]);
900 free(dna->seqs);
901 free(dna->segranges0);
902 free(dna->segranges1);
903 free(dna->segranges2);
904 free(dna->segranges3);
905 free(dna);
906 } /* freednadata */
907
908
909 /*******************************************************************
910 * printdnadata() prints the dna sequences contained in the passed *
911 * data structure to the passed file in sequential form. */
printdnadata(option_struct * op,data_fmt * data,FILE * out)912 void printdnadata(option_struct *op, data_fmt *data, FILE *out)
913 {
914 long pop, numpop, loc, numloci, ind, numind, site, numsite, siteout;
915 dnadata *dna;
916
917 /* can't use standard functions to set all num* due to use of global
918 "population" */
919 dna = data->dnaptr;
920 numpop = getdata_numpop(op,data);
921 for(pop = 0; pop < numpop; pop++) {
922 fprintf(out,"----------------------------\n");
923 fprintf(out,"Sequences for Population %ld\n",pop+1);
924 fprintf(out,"----------------------------\n");
925 numloci = getdata_numloci(op,data);
926 for(loc = 0; loc < numloci; loc++) {
927 fprintf(out,"Locus %ld -------------------\n",loc+1);
928 numind = dna->numseq[pop];
929 for(ind = 0; ind < numind; ind++) {
930 fprintf(out,"%s",dna->indnames[pop][loc][ind]);
931 numsite = dna->sites[loc];
932 for(site = 0, siteout = NMLNGTH; site < numsite; site++) {
933 fprintf(out,"%c",dna->seqs[pop][loc][ind][site]);
934 siteout++;
935 if (siteout == OUTLINESIZE) {fprintf(out,"\n"); siteout = 0;}
936 }
937 fprintf(out,"\n");
938 }
939 }
940 fprintf(out,"\n");
941 }
942 fprintf(out,"\n\n");
943
944 } /* printdnadata */
945
946
947 /********************************************************************
948 * initinvartips() sets up the special fractional tips for SNP data */
initinvartips(option_struct * op,data_fmt * data,long categs,tree * curtree)949 void initinvartips(option_struct *op, data_fmt *data, long categs,
950 tree *curtree)
951 {
952 long i, j, k, m, pos, n, numseq, numtips;
953
954 pos = getdata_nummarkers(op,data);
955 numseq = getdata_numseq(op,data);
956 numtips = getdata_numtips(op,data);
957
958 if (op->panel) {
959 /* real tips */
960 for(i = 1; i <= numseq; i++) {
961 for (m = pos; m < pos+NUMINVAR; m++) {
962 for(j=0; j < categs; j++) {
963 for (n = 0; n < NUMSLICE; n++) {
964 for (k = baseA; k <= baseT; k++)
965 curtree->nodep[i]->x->s[m][j][n][k] = 1.0;
966 }
967 }
968 }
969 }
970 /* fake tips */
971 for (i=numseq+1; i <= numtips; i++) {
972 for (m = 0; m < pos+NUMINVAR; m++) {
973 for(j = 0; j < categs; j++) {
974 for(k = baseA; k <= baseT; k++) {
975 curtree->nodep[i]->x->s[m][j][0][k] = 1.0;
976 for(n = 1; n < NUMSLICE; n++)
977 curtree->nodep[i]->x->s[m][j][n][k] = 0.0;
978 }
979 curtree->nodep[i]->x->s[m][j][1][baseA] = 1.0;
980 curtree->nodep[i]->x->s[m][j][2][baseC] = 1.0;
981 curtree->nodep[i]->x->s[m][j][3][baseG] = 1.0;
982 curtree->nodep[i]->x->s[m][j][4][baseT] = 1.0;
983 }
984 }
985 }
986 } else {
987 for(i = 1; i <= numtips; i++) {
988 for(j = 0; j < categs; j++) {
989 for (m = pos; m < pos+NUMINVAR; m++) {
990 for(k = baseA; k <= baseT; k++) {
991 curtree->nodep[i]->x->s[m][j][0][k] = 0.0;
992 }
993 }
994 curtree->nodep[i]->x->s[pos][j][0][baseA] = 1.0;
995 curtree->nodep[i]->x->s[pos+1][j][0][baseC] = 1.0;
996 curtree->nodep[i]->x->s[pos+2][j][0][baseG] = 1.0;
997 curtree->nodep[i]->x->s[pos+3][j][0][baseT] = 1.0;
998 }
999 }
1000 }
1001
1002 } /* initinvartips */
1003
1004
1005 /*************************************************************
1006 * makednavalues() set up fractional likelihoods at the tips */
makednavalues(option_struct * op,data_fmt * data,long categs,tree * curtree)1007 void makednavalues(option_struct *op, data_fmt *data, long categs,
1008 tree *curtree)
1009 {
1010 long i, k, l, m, numslice;
1011 long b;
1012 char ****y;
1013
1014 y = data->dnaptr->seqs;
1015 numslice = (op->panel) ? NUMSLICE : 1L;
1016
1017 for (k = 0; k < getdata_nummarkers(op,data); k++) {
1018 /* real tips are 1 to numseq */
1019 for (i = 1; i <= getdata_numseq(op,data); i++) {
1020 strcpy(curtree->nodep[i]->nayme,
1021 data->dnaptr->indnames[population][locus][i-1]);
1022 for (l = 0; l < categs; l++) {
1023 for (m = 0; m < numslice; m++) {
1024 for (b = baseA; b <= baseT; b = b + 1)
1025 curtree->nodep[i]->x->s[k][l][m][b] = 0.0;
1026
1027 switch (y[population][locus][i-1][k]) {
1028
1029 case 'A':
1030 curtree->nodep[i]->x->s[k][l][m][baseA] = 1.0;
1031 break;
1032
1033 case 'C':
1034 curtree->nodep[i]->x->s[k][l][m][baseC] = 1.0;
1035 break;
1036
1037 case 'G':
1038 curtree->nodep[i]->x->s[k][l][m][baseG] = 1.0;
1039 break;
1040
1041 case 'T':
1042 curtree->nodep[i]->x->s[k][l][m][baseT] = 1.0;
1043 break;
1044
1045 case 'U':
1046 curtree->nodep[i]->x->s[k][l][m][baseT] = 1.0;
1047 break;
1048
1049 case 'M':
1050 curtree->nodep[i]->x->s[k][l][m][baseA] = 1.0;
1051 curtree->nodep[i]->x->s[k][l][m][baseC] = 1.0;
1052 break;
1053
1054 case 'R':
1055 curtree->nodep[i]->x->s[k][l][m][baseA] = 1.0;
1056 curtree->nodep[i]->x->s[k][l][m][baseG] = 1.0;
1057 break;
1058
1059 case 'W':
1060 curtree->nodep[i]->x->s[k][l][m][baseA] = 1.0;
1061 curtree->nodep[i]->x->s[k][l][m][baseT] = 1.0;
1062 break;
1063
1064 case 'S':
1065 curtree->nodep[i]->x->s[k][l][m][baseC] = 1.0;
1066 curtree->nodep[i]->x->s[k][l][m][baseG] = 1.0;
1067 break;
1068
1069 case 'Y':
1070 curtree->nodep[i]->x->s[k][l][m][baseC] = 1.0;
1071 curtree->nodep[i]->x->s[k][l][m][baseT] = 1.0;
1072 break;
1073
1074 case 'K':
1075 curtree->nodep[i]->x->s[k][l][m][baseG] = 1.0;
1076 curtree->nodep[i]->x->s[k][l][m][baseT] = 1.0;
1077 break;
1078
1079 case 'B':
1080 curtree->nodep[i]->x->s[k][l][m][baseC] = 1.0;
1081 curtree->nodep[i]->x->s[k][l][m][baseG] = 1.0;
1082 curtree->nodep[i]->x->s[k][l][m][baseT] = 1.0;
1083 break;
1084
1085 case 'D':
1086 curtree->nodep[i]->x->s[k][l][m][baseA] = 1.0;
1087 curtree->nodep[i]->x->s[k][l][m][baseG] = 1.0;
1088 curtree->nodep[i]->x->s[k][l][m][baseT] = 1.0;
1089 break;
1090
1091 case 'H':
1092 curtree->nodep[i]->x->s[k][l][m][baseA] = 1.0;
1093 curtree->nodep[i]->x->s[k][l][m][baseC] = 1.0;
1094 curtree->nodep[i]->x->s[k][l][m][baseT] = 1.0;
1095 break;
1096
1097 case 'V':
1098 curtree->nodep[i]->x->s[k][l][m][baseA] = 1.0;
1099 curtree->nodep[i]->x->s[k][l][m][baseC] = 1.0;
1100 curtree->nodep[i]->x->s[k][l][m][baseG] = 1.0;
1101 break;
1102
1103 case 'N':
1104 for (b = baseA; b <= baseT; b++)
1105 curtree->nodep[i]->x->s[k][l][m][b] = 1.0;
1106 break;
1107
1108 case 'X':
1109 for (b = baseA; b <= baseT; b++)
1110 curtree->nodep[i]->x->s[k][l][m][b] = 1.0;
1111 break;
1112
1113 case '?':
1114 for (b = baseA; b <= baseT; b++)
1115 curtree->nodep[i]->x->s[k][l][m][b] = 1.0;
1116 break;
1117
1118 case 'O':
1119 for (b = baseA; b <= baseT; b++)
1120 curtree->nodep[i]->x->s[k][l][m][b] = 1.0;
1121 break;
1122
1123 case '-':
1124 for (b = baseA; b <= baseT; b++)
1125 curtree->nodep[i]->x->s[k][l][m][b] = 1.0;
1126 break;
1127
1128 default:
1129 fprintf(ERRFILE,"**ERROR in setting up tips, probable error");
1130 fprintf(ERRFILE," in input data file.\nInterleaved/sequential");
1131 fprintf(ERRFILE," status may be incorrectly set.\n\n");
1132 exit(-1);
1133 break;
1134 } /* sorry about the indenting */
1135 }
1136 }
1137 }
1138 }
1139 } /* makednavalues */
1140
1141
1142 /*************************************************************************
1143 * empiricaldnafreqs calculates empirical base frequencies from the data */
empiricaldnafreqs(option_struct * op,data_fmt * data,tree * curtree)1144 void empiricaldnafreqs(option_struct *op, data_fmt *data, tree *curtree)
1145 {
1146 long i, j;
1147 double temp, suma, sumc, sumg, sumt, w;
1148 dnadata *dna;
1149
1150 dna = data->dnaptr;
1151
1152 dna->freqa = 0.25;
1153 dna->freqc = 0.25;
1154 dna->freqg = 0.25;
1155 dna->freqt = 0.25;
1156 suma = 0.0;
1157 sumc = 0.0;
1158 sumg = 0.0;
1159 sumt = 0.0;
1160 for (i = 1; i <= getdata_numseq(op,data); i++) {
1161 for (j = 0; j < getdata_nummarkers(op,data); j++) {
1162 w = dna->dnaweight[j];
1163 temp = dna->freqa * curtree->nodep[i]->x->s[j][0][0][baseA];
1164 temp += dna->freqc * curtree->nodep[i]->x->s[j][0][0][baseC];
1165 temp += dna->freqg * curtree->nodep[i]->x->s[j][0][0][baseG];
1166 temp += dna->freqt * curtree->nodep[i]->x->s[j][0][0][baseT];
1167 suma += w * dna->freqa * curtree->nodep[i]->x->s[j][0][0][baseA] / temp;
1168 sumc += w * dna->freqc * curtree->nodep[i]->x->s[j][0][0][baseC] / temp;
1169 sumg += w * dna->freqg * curtree->nodep[i]->x->s[j][0][0][baseG] / temp;
1170 sumt += w * dna->freqt * curtree->nodep[i]->x->s[j][0][0][baseT] / temp;
1171 }
1172 }
1173 temp = suma + sumc + sumg + sumt;
1174 dna->freqa = suma / temp;
1175 dna->freqc = sumc / temp;
1176 dna->freqg = sumg / temp;
1177 dna->freqt = sumt / temp;
1178
1179 } /* empiricaldnafreqs */
1180
1181
getbasednafreqs(dnadata * dna,option_struct * op,double locus_ttratio,FILE * outfile)1182 void getbasednafreqs(dnadata *dna, option_struct *op, double locus_ttratio,
1183 FILE *outfile)
1184 {
1185 double aa, bb;
1186
1187 putc('\n', outfile);
1188 if (op->freqsfrom)
1189 fprintf(outfile, "Empirical ");
1190 fprintf(outfile, "Base Frequencies:\n\n");
1191 fprintf(outfile, " A %10.5f\n", dna->freqa);
1192 fprintf(outfile, " C %10.5f\n", dna->freqc);
1193 fprintf(outfile, " G %10.5f\n", dna->freqg);
1194 fprintf(outfile, " T(U) %10.5f\n", dna->freqt);
1195 dna->freqr = dna->freqa + dna->freqg;
1196 dna->freqy = dna->freqc + dna->freqt;
1197 dna->freqar = dna->freqa / dna->freqr;
1198 dna->freqcy = dna->freqc / dna->freqy;
1199 dna->freqgr = dna->freqg / dna->freqr;
1200 dna->freqty = dna->freqt / dna->freqy;
1201 fprintf(outfile, "Transition/transversion ratio = %10.6f\n", locus_ttratio);
1202 aa = locus_ttratio * dna->freqr * dna->freqy -
1203 dna->freqa * dna->freqg - dna->freqc * dna->freqt;
1204 bb = dna->freqa * dna->freqgr + dna->freqc * dna->freqty;
1205 dna->xi = aa / (aa + bb);
1206 dna->xv = 1.0 - dna->xi;
1207 dna->ttratio = dna->xi / dna->xv;
1208 if (dna->xi <= 0.0) {
1209 printf("WARNING: This transition/transversion ratio\n");
1210 printf("is impossible with these base frequencies!\n");
1211 dna->xi = 3.0 / 5;
1212 dna->xv = 2.0 / 5;
1213 fprintf(outfile, " Transition/transversion parameter reset\n\n");
1214 }
1215 fprintf(outfile, "(Transition/transversion parameter = %10.6f)\n",
1216 dna->xi / dna->xv);
1217 dna->fracchange = dna->xi * (2 * dna->freqa * dna->freqgr +
1218 2 * dna->freqc * dna->freqty) +
1219 dna->xv * (1.0 - dna->freqa * dna->freqa -
1220 dna->freqc * dna->freqc - dna->freqg * dna->freqg -
1221 dna->freqt * dna->freqt);
1222 } /* getbasednafreqs */
1223
1224
uppercase(char ch)1225 char uppercase(char ch)
1226 {
1227 return((islower((int)ch) ? (char)toupper((int)ch) : (ch)));
1228
1229 } /* uppercase */
1230
1231
1232 /******************************************************************
1233 * inputdnaweights() reads the sitewise weights for the dna data. *
1234 * 0-9 and A-Z for weights 0-35. */
inputdnaweights(long numchars,dnadata * dna,option_struct * op)1235 void inputdnaweights(long numchars, dnadata *dna, option_struct *op)
1236 {
1237 char ch;
1238 long i;
1239
1240 for (i = 0; i < numchars; i++) {
1241 do {
1242 if (eoln(weightfile)) {
1243 fscanf(weightfile, "%*[^\n]");
1244 getc(weightfile);
1245 }
1246 ch = getc(weightfile);
1247 if (ch == '\n') ch = ' ';
1248 } while (ch == ' ');
1249
1250 dna->dnaweight[i] = 1;
1251 if (isdigit((int)ch))
1252 dna->dnaweight[i] = (long)(ch - '0');
1253 else if (isalpha((int)ch)) {
1254 ch = (char)uppercase(ch);
1255 dna->dnaweight[i] = (long)(ch - 'A' + 10);
1256 } else {
1257 printf("BAD WEIGHT CHARACTER: %c\n", ch);
1258 exit(-1);
1259 }
1260 }
1261
1262 fscanf(weightfile, "%*[^\n]");
1263 getc(weightfile);
1264 op->weights = TRUE;
1265 fclose(weightfile);
1266
1267 } /* inputweights */
1268
1269
1270 /************************************************
1271 * hapdist computes a distance between two sets *
1272 * of haplotype resolutions. */
hapdist(option_struct * op,data_fmt * hap1,data_fmt * hap2,creature * critter)1273 long hapdist(option_struct *op,data_fmt *hap1, data_fmt *hap2, creature *critter)
1274 {
1275 long mkr, cr, numcreatures, markers;
1276 long score[2];
1277 long fullscore;
1278
1279 numcreatures = getdata_numtips(op,hap1)/NUMHAPLOTYPES;
1280 markers = getdata_nummarkers(op,hap1);
1281
1282 fullscore = 0;
1283 for (cr=0; cr<numcreatures; cr++) {
1284 score[0] = 0;
1285 score[1] = 0;
1286 for (mkr=0; mkr < markers; mkr++) {
1287 /* matching the two 0 and the two 1 haplotypes */
1288 if (hap1->dnaptr->seqs[population][locus]
1289 [critter[cr].haplotypes[0]->number-1][mkr] !=
1290 hap2->dnaptr->seqs[population][locus]
1291 [critter[cr].haplotypes[0]->number-1][mkr])
1292 score[0]++;
1293 if (hap1->dnaptr->seqs[population][locus]
1294 [critter[cr].haplotypes[1]->number-1][mkr] !=
1295 hap2->dnaptr->seqs[population][locus]
1296 [critter[cr].haplotypes[1]->number-1][mkr])
1297 score[0]++;
1298 /* matching 0-1 and 1-0 haplotypes */
1299 if (hap1->dnaptr->seqs[population][locus]
1300 [critter[cr].haplotypes[0]->number-1][mkr] !=
1301 hap2->dnaptr->seqs[population][locus]
1302 [critter[cr].haplotypes[1]->number-1][mkr])
1303 score[1]++;
1304 if (hap1->dnaptr->seqs[population][locus]
1305 [critter[cr].haplotypes[1]->number-1][mkr] !=
1306 hap2->dnaptr->seqs[population][locus]
1307 [critter[cr].haplotypes[0]->number-1][mkr])
1308 score[1]++;
1309 }
1310 if (score[0]<score[1]) fullscore += score[0];
1311 else fullscore += score[1];
1312 }
1313 return(fullscore);
1314
1315 } /* hapdist */
1316
1317 /********************************************************
1318 * This reads in the "true" haplotypes for the purposes *
1319 * of comparing them with derived ones. Input file is *
1320 * truehaps. */
readtruehaps(option_struct * op,data_fmt * hap,long numpop,long numloci,long numind,long * numsites)1321 void readtruehaps(option_struct *op, data_fmt *hap, long numpop, long
1322 numloci, long numind, long *numsites)
1323 {
1324 char *s;
1325 FILE *truehaps;
1326
1327 s = (char *)calloc(LINESIZE,sizeof(char));
1328
1329 truehaps = fopen("truehaps","r");
1330 /* read off first 3 lines of standard input file as unneeded */
1331 read_line(truehaps,s); read_line(truehaps,s); read_line(truehaps,s);
1332
1333 setupdata(hap,op->datatype,numpop,numloci,numind,numsites,"True Haplotypes");
1334 setpopstuff(hap,0L,numind,"poptitle",op->datatype);
1335 read_popdata(truehaps,hap,0L,op);
1336
1337 free(s);
1338
1339 } /* readtruehaps */
1340
1341 /********************************************************
1342 * This copies a set of haplotypes into a new data *
1343 * structure which it allocates. */
copyhaps(option_struct * op,data_fmt * oldhap,data_fmt * newhap,long numpop,long numloci,long numind,long * numsites)1344 void copyhaps(option_struct *op, data_fmt *oldhap, data_fmt *newhap,
1345 long numpop, long numloci, long numind, long *numsites)
1346 {
1347 long pop, seq, site, loc;
1348
1349 setupdata(newhap,op->datatype,numpop, numloci, numind,
1350 numsites, "Copy Haplotypes");
1351 setpopstuff(newhap,0L,numind,"poptitle",op->datatype);
1352
1353
1354 for (pop = 0; pop < numpop; pop++) {
1355 for (loc = 0; loc < numloci; loc++) {
1356 for (seq = 0; seq < numind; seq++) {
1357 for (site = 0; site < numsites[locus]; site++) {
1358 newhap->dnaptr->seqs[pop][locus][seq][site] =
1359 oldhap->dnaptr->seqs[pop][locus][seq][site];
1360 }
1361 }
1362 }
1363 }
1364
1365 } /* copyhaps */
1366