1 /*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "PedigreeDescription.h"
19 #include "MapFunction.h"
20 #include "MathVector.h"
21 #include "Constant.h"
22 #include "FortranFormat.h"
23 #include "Error.h"
24
25 #include <stdlib.h>
26 #include <ctype.h>
27 #include <string.h>
28 #include <math.h>
29
PedigreeDescription()30 PedigreeDescription::PedigreeDescription()
31 {
32 columnCount = 0;
33 mendelFormat = false;
34 }
35
~PedigreeDescription()36 PedigreeDescription::~PedigreeDescription()
37 { };
38
operator =(PedigreeDescription & rhs)39 PedigreeDescription & PedigreeDescription::operator = (PedigreeDescription & rhs)
40 {
41 columnCount = rhs.columnCount;
42
43 columns = rhs.columns;
44 columnHash = rhs.columnHash;
45
46 return *this;
47 };
48
Load(IFILE & input,bool warnIfLinkage)49 void PedigreeDescription::Load(IFILE & input, bool warnIfLinkage)
50 {
51 // Check if we are dealing with a linkage format data file
52 String buffer;
53 StringArray tokens;
54
55 mendelFormat = false;
56
57 ReadLineHelper(input, buffer, tokens);
58 ifrewind(input);
59
60 if (tokens.Length() == 4 && isdigit(tokens[0][0]))
61 {
62 if (warnIfLinkage) printf("Data file looks like a LINKAGE format file...\n\n");
63 LoadLinkageDataFile(input);
64 return;
65 }
66
67 if (buffer.Length() > 18
68 && (buffer.SubStr(8,8).SlowCompare("AUTOSOME") == 0 ||
69 buffer.SubStr(8,8).SlowCompare("X-LINKED") == 0)
70 && (isdigit(buffer[16]) || isdigit(buffer[17]))
71 && (isdigit(buffer[18]) || isdigit(buffer[19]) ||
72 (buffer.Length() > 19 && isdigit(buffer[20]))))
73 {
74 printf("Data file looks like a MENDEL format file...\n"
75 " Activating EXPERIMENTAL support for this format\n\n");
76 LoadMendelDataFile(input);
77 return;
78 }
79
80 // Reset things
81 ifrewind(input);
82 int done = 0;
83 int line = 0;
84
85 columns.Clear();
86 columnHash.Clear();
87 columnCount = 0;
88
89 while (!ifeof(input) && !done)
90 {
91 int i;
92
93 buffer.ReadLine(input);
94 line++;
95
96 tokens.Clear();
97 tokens.AddTokens(buffer, WHITESPACE);
98
99 if (tokens.Length() < 1) continue;
100
101 if (tokens.Length() == 1)
102 error("Problem reading data file:\n"
103 "Item #%d (of type %s) has no name.",
104 columnCount+1, (const char *) tokens[0]);
105
106 switch (toupper(tokens[0][0]))
107 {
108 case 'A' :
109 columnHash.Push(GetAffectionID(tokens[1]));
110 columns.Push(pcAffection);
111 columnCount++;
112 break;
113 case 'M' :
114 columnHash.Push(GetMarkerID(tokens[1]));
115 columns.Push(pcMarker);
116 columnCount++;
117 break;
118 case 'T' :
119 columnHash.Push(GetTraitID(tokens[1]));
120 columns.Push(pcTrait);
121 columnCount++;
122 break;
123 case 'C' :
124 columnHash.Push(GetCovariateID(tokens[1]));
125 columns.Push(pcCovariate);
126 columnCount++;
127 break;
128 case '$' :
129 columnHash.Push(GetStringID(tokens[1]));
130 columns.Push(pcString);
131 columnCount++;
132 break;
133 case 'S' :
134 i = (int) tokens[0].SubStr(1);
135 i = i > 0 ? i : 1;
136 while (i--)
137 {
138 columns.Push(pcSkip);
139 columnHash.Push(0);
140 columnCount++;
141 }
142 break;
143 case 'Z' :
144 columnHash.Push(0);
145 columns.Push(pcZygosity);
146 columnCount++;
147 break;
148 case 'V' :
149 GetMarkerID(tokens[1]);
150 break;
151 case 'E' :
152 done = 1;
153 break;
154 case 'U' :
155 if (toupper(tokens[0][1]) == 'T' && toupper(tokens[0][2]) == 'C')
156 {
157 int c = GetCovariateID(tokens[1]);
158 int t = GetTraitID(tokens[1]);
159
160 if (c >= 32767 || t >= 32767)
161 error("Internal error processing data file\n");
162
163 columnHash.Push(t * 32768 + c);
164 columns.Push(pcUndocumentedTraitCovariate);
165 columnCount++;
166 break;
167 }
168 default :
169 error("Problem in data file (line %d):\n%s\n",
170 line, (const char *) buffer);
171 }
172 }
173
174 columns.Push(pcEnd);
175 columnHash.Push(0);
176 };
177
Load(const char * iFilename,bool warnIfLinkage)178 void PedigreeDescription::Load(const char * iFilename, bool warnIfLinkage)
179 {
180 IFILE f = ifopen(iFilename, "rb");
181
182 if (f == NULL)
183 error(
184 "The datafile %s cannot be opened\n\n"
185 "Common causes for this problem are:\n"
186 " * You might not have used the correct options to specify input file names,\n"
187 " please check the program documentation for information on how to do this\n\n"
188 " * The file doesn't exist or the filename might have been misspelt\n\n"
189 " * The file exists but it is being used by another program which you will need\n"
190 " to close before continuing\n\n"
191 " * The file is larger than 2GB and you haven't compiled this application with\n"
192 " large file support.\n\n",
193 iFilename);
194
195 Load(f, warnIfLinkage);
196 ifclose(f);
197
198 filename = iFilename;
199 };
200
LoadMap(const char * iFilename)201 void PedigreeDescription::LoadMap(const char * iFilename)
202 {
203 IFILE f = ifopen(iFilename, "rb");
204
205 if (f == NULL)
206 error(
207 "The mapfile %s cannot be opened\n\n"
208 "Please check that the file exists and is not being used by another program\n"
209 "To find out how to set input filenames, check the documentation\n",
210 iFilename);
211
212 LoadMap(f);
213 ifclose(f);
214 };
215
LoadMap(IFILE & input)216 void PedigreeDescription::LoadMap(IFILE & input)
217 {
218 columns.Clear();
219 columnHash.Clear();
220 columnCount = 0;
221
222 int lastposition = 0;
223 String buffer;
224 StringArray tokens;
225
226 buffer.ReadLine(input);
227 tokens.AddTokens(buffer, WHITESPACE);
228
229 while (tokens.Length() == 0 && !ifeof(input))
230 {
231 buffer.ReadLine(input);
232 tokens.AddTokens(buffer, WHITESPACE);
233 }
234
235 if (tokens.Length() != 3)
236 error("Error reading map file header, which has %d columns.\n"
237 "Three columns were expected, corresponding to\n"
238 "MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION\n"
239 "The offending header is transcribed below:\n\n"
240 "%s", tokens.Length(), (const char *) buffer);
241 else
242 printf("Map file column labels\n"
243 " -- COLUMN 1, Expecting MARKER_ID, Read %s\n"
244 " -- COLUMN 2, Expecting MARKER_NAME, Read %s\n"
245 " -- COLUMN 3, Expection BASE_PAIR_POSITION, Read %s\n\n",
246 (const char *)(tokens[0]), (const char *)(tokens[1]),
247 (const char *)(tokens[2]));
248
249 int line = 1;
250 while (!ifeof(input))
251 {
252 int serial;
253 long position;
254
255 buffer.ReadLine(input);
256 line++;
257
258 tokens.Clear();
259 tokens.AddTokens(buffer, WHITESPACE);
260
261 if (tokens.Length() < 1) continue;
262 if (tokens.Length() != 3)
263 error("Each line in the map file should have 3 tokens, corresponding\n"
264 "to MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION respectively\n"
265 "However, there are %d tokens in line %d, transcribed below:\n\n"
266 "%s", tokens.Length(), line, (const char *) buffer);
267
268 serial = (int) tokens[0];
269 if (serial != columnCount + 1)
270 error("Reading Marker Index from Map File...\n"
271 "Markers should be indexed consecutively starting at 1\n"
272 "Marker %d does not fit this pattern\n", columnCount + 1);
273
274 position = (int) tokens[2];
275 if (position < lastposition)
276 error("Reading Marker Position from Map File...\n"
277 "Marker position should be in base-pairs\n"
278 "and markers should be in map order\n");
279
280 // TODO -- store marker locations somewhere!
281 lastposition = position;
282
283 columnHash.Push(GetMarkerID(tokens[1]));
284 columns.Push(pcMarker);
285 columnCount++;
286
287 GetMarkerInfo(tokens[1])->position = position * 1e-8;
288 }
289
290 columns.Push(pcEnd);
291 columnHash.Push(0);
292 };
293
CountTextColumns()294 int PedigreeDescription::CountTextColumns()
295 {
296 int count = 0;
297
298 for (int i = 0; i < columnCount; i++, count++)
299 if (columns[i] == pcMarker)
300 count++;
301
302 return count;
303 }
304
LoadLinkageDataFile(const char * iFilename)305 void PedigreeDescription::LoadLinkageDataFile(const char * iFilename)
306 {
307 IFILE f = ifopen(iFilename, "rb");
308
309 if (f == NULL)
310 error(
311 "The linkage format datafile %s cannot be opened\n\n"
312 "Please check that the file exists and is not being used by another program\n"
313 "To find out how to set input filenames, check the documentation\n",
314 iFilename);
315
316 LoadLinkageDataFile(f);
317 ifclose(f);
318
319 filename = iFilename;
320 };
321
LoadLinkageDataFile(IFILE & input)322 void PedigreeDescription::LoadLinkageDataFile(IFILE & input)
323 {
324 columns.Clear();
325 columnHash.Clear();
326 columnCount = 0;
327
328 String buffer, label;
329 StringArray tokens;
330
331 ReadLineHelper(input, buffer, tokens);
332
333 if (tokens.Length() != 4 || tokens[2].AsInteger() != (int) chromosomeX ||
334 tokens[0].AsInteger() < 0)
335 error("Cannot handle first line of data file\n\n"
336 "Expecting four (4) numeric values, which correspond to:\n"
337 " num-loci -- number of loci in the pedigree\n"
338 " this value must be positive\n"
339 " risk-locus -- locus for which risks should be calculated\n"
340 " this value will be ignored\n"
341 " sex-link -- are the loci sex linked [0 - No, 1 - Yes]\n"
342 " %s\n"
343 " program -- which LINKAGE program do you want to use?\n"
344 " this value will also be ignored\n\n"
345 "The actual input read:\n%s\n",
346 chromosomeX ? "expecting X-linked data, so this value must be ONE (1)"
347 : "expecting autosomal data, so this must be ZERO (0)",
348 (const char *) buffer);
349
350 int numloci = tokens[0];
351
352 ReadLineHelper(input, buffer, tokens);
353
354 if (tokens.Length() != 4 ||
355 tokens[0].AsInteger() != 0 ||
356 tokens[3].AsInteger() != 0)
357 error("Cannot handle second line of data file\n\n"
358 "Expecting four (4) numeric values, which correspond to:\n"
359 " mutation-model -- must be zero, corresponding to no mutation\n"
360 " male-mutation-rate -- ignored\n"
361 " female-mutation-rate -- ignored\n"
362 " linkage-disequilibrium -- must be zero, may be used in the future to\n"
363 " read haplotype frequencies\n\n"
364 "The actual input read:\n%s\n", (const char *) buffer);
365
366 StringArray markerOrder;
367 int unknown = 0;
368
369 ReadLineHelper(input, buffer, markerOrder);
370
371 if (markerOrder.Length() > numloci)
372 error("The third line of the data file lists marker order\n\n"
373 "Although %d loci are defined [in the first line],\n"
374 "this line includes %d values:\n%s\n",
375 numloci, markerOrder.Length(), (const char *) buffer);
376
377 IntArray locus;
378 bool need_blank_line = false;
379
380 while (!ifeof(input) && numloci--)
381 {
382 if (ReadLineHelper(input, buffer, tokens) == 0)
383 error("Linkage data file ends unexpectedly");
384
385 if (tokens.Length() < 2)
386 error("Incomplete locus information in data file\n"
387 "Information for each locus should include 2 or more fiels\n"
388 "The expected fields are:\n"
389 " field_type -- indicator of locus type (trait, marker,...)\n"
390 " alleles -- number of alleles\n"
391 " name -- locus name, preceded by hash (#) sign\n\n"
392 "The actual input read:\n%s\n", (const char *) buffer);
393
394 int locus_type = (int) tokens[0];
395 int alleles = (int) tokens[1];
396
397 String locus_name("LOCUS");
398 locus_name += ++unknown;
399
400 if (tokens.Length() > 2 && tokens[2][0] == '#')
401 {
402 if (tokens[2][1] != 0)
403 locus_name = tokens[2].SubStr(1);
404 else if (tokens.Length() > 3)
405 locus_name = tokens[3];
406 }
407
408 if ((locus_type == 4 && alleles == 0) ||
409 (locus_type == 4 && alleles == 1))
410 {
411 columnHash.Push(GetCovariateID(locus_name));
412 columns.Push(pcCovariate);
413 columnCount++;
414 continue;
415 }
416
417 if (locus_type == 0 && alleles == 0)
418 {
419 columnHash.Push(GetTraitID(locus_name));
420 columns.Push(pcTrait);
421 columnCount++;
422 continue;
423 }
424
425 if (ReadLineHelper(input, buffer, tokens) != alleles)
426 error("Expecting %d allele frequencies, but input has %d columns:\n"
427 "%s\n", alleles, tokens.Length(), (const char *) buffer);
428
429 Vector frequencies(alleles + 1);
430
431 frequencies[0] = 0.0;
432 for (int i = 1; i <= alleles; i++)
433 frequencies[i] = (double) tokens[i - 1];
434
435 double sum = frequencies.Sum();
436
437 if (sum <= 0.0)
438 error("Allele frequencies at %s sum to %f, which doesn't make sense\n",
439 (const char *) locus_name, sum);
440
441 if (fabs(sum - 1.0) > 1.2e-5)
442 {
443 printf("Allele frequencies at %s sum to %f, adjusted to 1.0\n",
444 (const char *) locus_name, sum);
445 need_blank_line = true;
446 }
447
448 if (sum != 1.0)
449 frequencies *= 1.0 / sum;
450
451 switch (locus_type)
452 {
453 case 1 :
454 {
455 // Affection
456 columnHash.Push(GetAffectionID(locus_name));
457 columns.Push(pcAffection);
458 columnCount++;
459
460 // Read number of liability classes
461 if (ReadLineHelper(input, buffer, tokens) == 0)
462 error("Linkage data file ends unexpectedly\n");
463
464 // Skip liability class data
465 int classes = tokens[0];
466 if (classes > 1)
467 {
468 columnHash.Push(0);
469 columns.Push(pcSkip);
470 columnCount++;
471 }
472
473 // Separate liability class rows for males and females for X-linked data
474 if (chromosomeX) classes *= 2;
475
476 while (classes--)
477 if (ReadLineHelper(input, buffer, tokens) == 0)
478 error("Linkage data file ends unexpectedly\n");
479
480 // Ignore map location for quantitative variables
481 locus.Push(-1);
482 }
483 break;
484 case 3 :
485 {
486 columnHash.Push(GetMarkerID(locus_name));
487 columns.Push(pcMarker);
488 columnCount++;
489
490 // Store allele frequencies
491 MarkerInfo * info = GetMarkerInfo(locus_name);
492
493 info->freq = frequencies;
494
495 // Initialize allele labels
496 info->alleleLabels.Clear();
497 for (int i = 0; i < frequencies.Length(); i++)
498 info->alleleLabels.Push(label = i);
499 info->IndexAlleles();
500
501 // Store marker id, so that we can track map location
502 locus.Push(GetMarkerID(locus_name));
503 }
504 break;
505 case 0 :
506 {
507 // Read number of quantitative variables
508 if (ReadLineHelper(input, buffer, tokens) == 0)
509 error("Linkage data file ends unexpectedly\n");
510
511 // Add each quantitative variable to pedigree
512 // Discard information on means
513 for (int vars = tokens[0], i = 0; i < vars; i++)
514 {
515 if (ReadLineHelper(input, buffer, tokens) == 0)
516 error("Linkage data file ends unexpectedly\n");
517
518 String trait_name(locus_name);
519
520 if (i)
521 {
522 trait_name += ".";
523 trait_name += i + 1;
524 }
525
526 columnHash.Push(GetTraitID(trait_name));
527 columns.Push(pcTrait);
528 columnCount++;
529 }
530
531 // Skip var-covar matrix
532 if (ReadLineHelper(input, buffer, tokens) == 0)
533 error("Linkage data file ends unexpectedly\n");
534
535 // Skip heterozygote scaling factor for var-covar matrix
536 if (ReadLineHelper(input, buffer, tokens) == 0)
537 error("Linkage data file ends unexpectedly\n");
538
539 // Ignore map location for quantitative variables
540 locus.Push(-1);
541 }
542 break;
543 case 2 :
544 error("The data file includes binary factors\n"
545 "Regretably, loci of this type are not supported\n\n");
546 break;
547 default :
548 error("Unsupported locus type [%d] in data file", locus_type);
549 break;
550 }
551 }
552
553 if (need_blank_line) printf("\n");
554
555 columns.Push(pcEnd);
556 columnHash.Push(0);
557
558 ReadLineHelper(input, buffer, tokens);
559 int sexDifference = tokens.Length() ? tokens[0].AsInteger() : -1;
560 if (tokens.Length() != 2 ||
561 (sexDifference != 0 && sexDifference != 2) ||
562 tokens[1].AsInteger() != 0)
563 error("Error retrieving recombination information\n\n"
564 "Expecting two (2) numeric values, which correspond to:\n"
565 " sex-difference -- must be zero (no difference) or two (sex specific recombination)\n"
566 " map-function -- must be zero, that is, no interference\n"
567 "The actual input read:\n%s\n", (const char *) buffer);
568
569 Vector distances[2];
570 bool distance_in_centimorgans = false;
571
572 for (int r = 0; r <= sexDifference; r += 2)
573 {
574 ReadLineHelper(input, buffer, tokens);
575 if (tokens.Length() != markerOrder.Length() - 1)
576 error("Error retrieving recombination information\n\n"
577 "Expecting %d recombination fractions (current map includes %d loci)\n"
578 "Instead the following line was input:\n%s\n",
579 markerOrder.Length() - 1, markerOrder.Length(), (const char *) buffer);
580
581 distances[r >> 1].Dimension(tokens.Length());
582 for (int i = 0; i < tokens.Length(); i++)
583 distances[r >> 1][i] = (double) tokens[i];
584
585 if (distances[r >> 1].Min() < 0.0)
586 error("Linkage datafile specifies negative recombination fractions");
587
588 bool centimorgans = distances[r >> 1].Max() > 0.5;
589
590 if (centimorgans && !distance_in_centimorgans)
591 printf(" Some recombination fractions in datafile are greater than 0.5,\n"
592 " so recombination fractions will be interpreted as cM distances\n\n");
593
594 distance_in_centimorgans |= centimorgans;
595 }
596
597 double position = 0.0, positionMale = 0.0;
598
599 for (int i = 0, moving = false; i < markerOrder.Length(); i++)
600 {
601 int m = markerOrder[i].AsInteger() - 1;
602
603 if (m < 0 || m >= locus.Length())
604 error("The marker order in the linkage datafile is invalid\n");
605
606 m = locus[m];
607
608 if (m != -1)
609 {
610 MarkerInfo * info = GetMarkerInfo(m);
611 info->chromosome = chromosomeX ? 9999 : 0;
612
613 if (sexDifference == 2)
614 info->position = (position + positionMale) * 0.5,
615 info->positionFemale = position,
616 info->positionMale = positionMale;
617 else
618 info->position = info->positionMale = info->positionFemale = position;
619
620 moving = true;
621 }
622
623 if (i < markerOrder.Length() - 1 && moving)
624 position += distance_in_centimorgans ?
625 0.01 * distances[0][i] : RecombinationToDistance(distances[0][i]);
626
627 if (sexDifference == 2 && i < markerOrder.Length() - 1 && moving)
628 positionMale += distance_in_centimorgans ?
629 0.01 * distances[1][i] : RecombinationToDistance(distances[1][i]);
630 }
631 }
632
ReadLineHelper(IFILE & input,String & buffer,StringArray & tokens)633 int PedigreeDescription::ReadLineHelper(IFILE & input,
634 String & buffer,
635 StringArray & tokens)
636 {
637 do
638 {
639 // Read Line
640 buffer.ReadLine(input);
641 buffer.Trim();
642
643 // Strip comments marked with >>
644 int pos = buffer.FastFind(">>");
645 if (pos == -1) pos = buffer.FastFind("<<");
646 if (pos == -1) pos = buffer.Length() + 1;
647 if (buffer[0] == '#') pos = 0;
648
649 // Find space/tab delimited tokens
650 tokens.Clear();
651 tokens.AddTokens(buffer.Left(pos - 1), WHITESPACE);
652
653 }
654 while (tokens.Length() == 0 && !ifeof(input));
655
656 return tokens.Length();
657 }
658
LoadMendelDataFile(const char * iFilename)659 void PedigreeDescription::LoadMendelDataFile(const char * iFilename)
660 {
661 IFILE f = ifopen(iFilename, "rb");
662
663 if (f == NULL)
664 error(
665 "The MENDEL format datafile %s cannot be opened\n\n"
666 "Please check that the file exists and is not being used by another program\n"
667 "To find out how to set input filenames, check the documentation\n",
668 iFilename);
669
670 LoadMendelDataFile(f);
671 ifclose(f);
672 };
673
LoadMendelDataFile(IFILE & file)674 void PedigreeDescription::LoadMendelDataFile(IFILE & file)
675 {
676 // Processes mendel format file
677 mendelFormat = true;
678
679 // Codominant markers are mapped to markers
680 // Non-codominant markers are mapped into multiple "affection status"
681 // (Y/N) variables
682 columns.Clear();
683 columnHash.Clear();
684 columnCount = 0;
685
686 FortranFormat parser;
687
688 // Variables for storing parsed input
689 String locusName;
690 String locusType;
691 String alleleLabel;
692 String alleleFreq;
693 String phenotype;
694 String genotype;
695 int phenoCount;
696 int alleleCount;
697
698 while (!ifeof(file))
699 {
700 // Cycle through headers for each locus
701 parser.SetInputFile(file);
702 parser.SetFormat("(2A8,I2,I3)");
703
704 // After retrieving locus name, check that we haven't tried to
705 // read past the end-of-file
706 parser.GetNextField(locusName);
707 parser.GetNextField(locusType);
708 alleleCount = parser.GetNextInteger();
709 phenoCount = parser.GetNextInteger();
710
711 if (locusName.IsEmpty() && locusType.IsEmpty() && alleleCount == 0 &&
712 phenoCount == 0 && ifeof(file))
713 break;
714
715 // Only recognize autosomal and x-linked loci
716 if (locusType.Compare("AUTOSOME") != 0 && locusType.Compare("X-LINKED"))
717 error("Unrecognized locus type '%s' in Mendel data file\n\n"
718 "Recognized locus types are \"AUTOSOME\" and \"X-LINKED\".",
719 (const char *) locusType);
720
721 if (locusType.Compare("AUTOSOME") == 0 && chromosomeX)
722 error("The data file indicates that locus %s is AUTOSOMAL, but\n"
723 "X-LINKED loci were expected as input\n",
724 (const char *) locusName);
725
726 if (locusType.Compare("X-LINKED") == 0 && !chromosomeX)
727 error("The data file indicates that locus %s is X-LINKED, but\n"
728 "AUTOSOMAL loci were expected as input\n",
729 (const char *) locusName);
730
731 if (locusName.IsEmpty())
732 error("Blank locus name encountered in data file\n");
733
734 if (phenoCount == 0)
735 {
736 // Co-dominant marker
737 columns.Push(pcMarker);
738 columnHash.Push(GetMarkerID(locusName));
739 columnCount++;
740
741 // Update marker info with allele labels and frequencies
742 MarkerInfo * info = GetMarkerInfo(locusName);
743
744 info->alleleLabels.Clear();
745 info->alleleLabels.Push("");
746 info->freq.Clear();
747
748 parser.SetFormat("(2A8)");
749
750 // Mendel allows allele names to be specified with frequencies
751 // left blank
752 for (int i = 0; i < alleleCount; i++)
753 {
754 parser.GetNextField(alleleLabel);
755 parser.GetNextField(alleleFreq);
756
757 if (alleleLabel.IsEmpty())
758 error("Locus %s is missing allele label for allele #%d\n",
759 (const char *) locusName, i+1);
760
761 info->alleleLabels.Push(alleleLabel);
762
763 if (!alleleFreq.IsEmpty())
764 {
765 if (info->freq.Length() == 0)
766 info->freq.Push(0.0);
767
768 info->freq.Push(alleleFreq.AsDouble());
769 }
770 }
771 info->IndexAlleles();
772
773 if (info->alleleLabels.Length() != info->freq.Length() &&
774 info->freq.Length() != 0)
775 error("Locus %s is missing allele frequency information for %d alleles\n",
776 (const char *) locusName,
777 info->alleleLabels.Length() - info->freq.Length());
778 }
779 else
780 {
781 // Non-codominant marker, which we decompose into multiple traits...
782 parser.SetFormat("(2A8)");
783
784 // First skip allele frequency information
785 for (int i = 0; i < alleleCount; i++)
786 {
787 parser.GetNextField(alleleLabel);
788 parser.GetNextField(alleleFreq);
789 }
790
791 // Then read in each phenotype
792 for (int i = 0; i < alleleCount; i++)
793 {
794 parser.SetFormat("(A8,I3)");
795 parser.GetNextField(phenotype);
796 int genoCount = parser.GetNextInteger();
797
798 parser.SetFormat("(A17)");
799 for (int j = 0; j < genoCount; j++)
800 parser.GetNextField(genotype);
801
802 columns.Push(pcAffection);
803 columnHash.Push(GetAffectionID(locusName + "->" + phenotype));
804 columnCount++;
805 }
806 }
807 }
808
809 columns.Push(pcEnd);
810 columnHash.Push(0);
811 }
812
CountColumns(int type)813 int PedigreeDescription::CountColumns(int type)
814 {
815 int count = 0;
816
817 for (int i = 0; i < columns.Length(); i++)
818 if (columns[i] == type)
819 count++;
820
821 return count;
822 }
823
ColumnSummary(String & string)824 const char * PedigreeDescription::ColumnSummary(String & string)
825 {
826 string.Clear();
827 UpdateSummary(string, pcMarker, " markers [x2 cols]");
828 UpdateSummary(string, pcTrait, " traits");
829 UpdateSummary(string, pcAffection, " discrete traits");
830 UpdateSummary(string, pcCovariate, " covariates");
831 UpdateSummary(string, pcString, " strings");
832 UpdateSummary(string, pcZygosity, " zygosity");
833 UpdateSummary(string, pcSkip, " skipped");
834 return string;
835 }
836
UpdateSummary(String & string,int type,const char * label)837 void PedigreeDescription::UpdateSummary(String & string, int type, const char * label)
838 {
839 int count = CountColumns(type);
840
841 if (count)
842 {
843 if (string.Length())
844 string += ", ";
845 string += count;
846 string += label;
847 }
848 }
849
850
AddMarkerColumn(const char * markerName)851 void PedigreeDescription::AddMarkerColumn(const char * markerName)
852 {
853 if (columns.Last() == pcEnd)
854 {
855 columns.Pop();
856 columnHash.Pop();
857 }
858
859 columnHash.Push(GetMarkerID(markerName));
860 columns.Push(pcMarker);
861 columnCount++;
862 }
863
AddCovariateColumn(const char * covariateName)864 void PedigreeDescription::AddCovariateColumn(const char * covariateName)
865 {
866 if (columns.Last() == pcEnd)
867 {
868 columns.Pop();
869 columnHash.Pop();
870 }
871
872 columnHash.Push(GetCovariateID(covariateName));
873 columns.Push(pcCovariate);
874 columnCount++;
875 }
876
AddTraitColumn(const char * traitName)877 void PedigreeDescription::AddTraitColumn(const char * traitName)
878 {
879 if (columns.Last() == pcEnd)
880 {
881 columns.Pop();
882 columnHash.Pop();
883 }
884
885 columnHash.Push(GetCovariateID(traitName));
886 columns.Push(pcTrait);
887 columnCount++;
888 }
889
AddAffectionColumn(const char * affectionName)890 void PedigreeDescription::AddAffectionColumn(const char * affectionName)
891 {
892 if (columns.Last() == pcEnd)
893 {
894 columns.Pop();
895 columnHash.Pop();
896 }
897
898 columnHash.Push(GetAffectionID(affectionName));
899 columns.Push(pcAffection);
900 columnCount++;
901 }
902
AddStringColumn(const char * stringName)903 void PedigreeDescription::AddStringColumn(const char * stringName)
904 {
905 if (columns.Last() == pcEnd)
906 {
907 columns.Pop();
908 columnHash.Pop();
909 }
910
911 columnHash.Push(GetStringID(stringName));
912 columns.Push(pcString);
913 columnCount++;
914 }
915
AddZygosityColumn()916 void PedigreeDescription::AddZygosityColumn()
917 {
918 if (columns.Last() == pcEnd)
919 {
920 columns.Pop();
921 columnHash.Pop();
922 }
923
924 columnHash.Push(0);
925 columns.Push(pcZygosity);
926 columnCount++;
927 }
928
AddSkippedColumn()929 void PedigreeDescription::AddSkippedColumn()
930 {
931 if (columns.Last() == pcEnd)
932 {
933 columns.Pop();
934 columnHash.Pop();
935 }
936
937 columnHash.Push(0);
938 columns.Push(pcSkip);
939 columnCount++;
940 }
941
942