1 /*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "GenotypeLists.h"
19
20 // When the next line is uncommented, the genotype elimination routines
21 // produce a lot of output useful for debugging
22 // #define DEBUG_ELIMINATOR
23
GenotypeList()24 GenotypeList::GenotypeList()
25 {
26 ignore = false;
27 }
28
EliminateGenotypes(Pedigree & ped,Family * family,int marker)29 bool GenotypeList::EliminateGenotypes(Pedigree & ped, Family * family, int marker)
30 {
31 // First, allocate a genotype list for the family
32 GenotypeList * list = new GenotypeList [family->count];
33
34 // Next, update the possible allele lists for each individual
35 InitializeList(list, ped, family, marker);
36
37 // Then, do multiple rounds of elimination until a problem is found
38 // or no changes are made
39
40 #ifdef DEBUG_ELIMINATOR
41 Print(list, ped, family, marker);
42 #endif
43
44 while (PairwiseCheck(list, ped, family) || FamilyCheck(list, ped, family))
45 #ifdef DEBUG_ELIMINATOR
46 Print(list, ped, family, marker)
47 #endif
48 ;
49
50 for (int i = 0; i < family->count; i++)
51 if (!list[i].ignore && list[i].allele1.Length() == 0)
52 {
53 printf("%s - Family %s has a subtle genotype inconsistency\n",
54 (const char *) ped.markerNames[marker], (const char *) family->famid);
55
56 delete [] list;
57 return false;
58 }
59
60 delete [] list;
61 return true;
62 }
63
InitializeList(GenotypeList * list,Pedigree & ped,Family * family,int marker)64 void GenotypeList::InitializeList(GenotypeList * list, Pedigree & ped, Family * family, int marker)
65 {
66 for (int i = family->count - 1; i >= 0; i--)
67 {
68 Person & person = ped[family->path[i]];
69 int id = person.traverse;
70 bool maleX = person.sex == SEX_MALE && ped.chromosomeX;
71
72 #ifdef DEBUG_ELIMINATOR
73 printf("Initializing genotype list for %s ...\n", (const char *) person.pid);
74 #endif
75
76 // If an individual is genotyped ...
77 if (person.markers[marker].isKnown())
78 {
79 // Their genotype list starts with just one entry!
80 list[id].Dimension(1);
81 list[id].SetGenotype(0, person.markers[marker][0], person.markers[marker][1]);
82 list[id].alleles.Clear();
83 list[id].alleles.Push(person.markers[marker][0]);
84 list[id].alleles.PushIfNew(person.markers[marker][1]);
85 list[id].ignore = false;
86
87 // "Heterozygous" males have no possible genotypes
88 if (maleX && person.markers[marker].isHeterozygous())
89 list[id].Dimension(0);
90 }
91 else if (list[id].alleles.Length())
92 if (person.sex == SEX_MALE && ped.chromosomeX)
93 {
94 // Males only carry one X chromosome
95 list[id].Dimension(list[id].alleles.Length() + 1);
96
97 for (int i = 0, out = 0; i < list[id].alleles.Length(); i++)
98 list[id].SetGenotype(out++, list[id].alleles[i], list[id].alleles[i]);
99 list[id].SetGenotype(list[id].alleles.Length(), -1, -1);
100
101 list[id].ignore = false;
102 }
103 else
104 {
105 // Build the genotype list based on the available allele lists
106 int count = list[id].alleles.Length() * (list[id].alleles.Length() + 3) / 2 + 1;
107
108 list[id].Dimension(count);
109
110 for (int i = 0, out = 0; i < list[id].alleles.Length(); i++)
111 {
112 // Allow for all pairs of "transmitted" alleles
113 for (int j = 0; j <= i; j++)
114 list[id].SetGenotype(out++, list[id].alleles[i], list[id].alleles[j]);
115
116 // Allow for an unstransmitted allele
117 list[id].SetGenotype(out++, list[id].alleles[i], -1);
118 }
119
120 // Allow for a pair of untransmitted alleles
121 list[id].SetGenotype(count - 1, -1, -1);
122
123 list[id].ignore = false;
124 }
125 else
126 list[id].ignore = true;
127
128 // If the individual is a founder this is all there is to it
129 if (i < family->founders) continue;
130
131 // If the individual is not a founder, update the parental genotype lists...
132 int fatid = person.father->traverse;
133 int motid = person.mother->traverse;
134
135 for (int i = 0; i < list[id].alleles.Length(); i++)
136 {
137 list[motid].alleles.PushIfNew(list[id].alleles[i]);
138 if (!maleX) list[fatid].alleles.PushIfNew(list[id].alleles[i]);
139 }
140 }
141 }
142
PairwiseCheck(GenotypeList * list,Pedigree & ped,Family * family)143 bool GenotypeList::PairwiseCheck(GenotypeList * list, Pedigree & ped, Family * family)
144 {
145 #ifdef DEBUG_ELIMINATOR
146 printf("Checking Relative Pairs ...\n");
147 #endif
148
149 bool changed = false;
150
151 for (int i = family->count - 1; i >= family->founders; i--)
152 {
153 Person & person = ped[family->path[i]];
154
155 int id = person.traverse;
156 int fatid = person.father->traverse;
157 int motid = person.mother->traverse;
158
159 bool maleX = person.sex == SEX_MALE && ped.chromosomeX;
160
161 if (list[id].ignore) continue;
162
163 // Check if genotypes are consistent with paternal genotypes
164 for (int i = 0; i < list[id].allele1.Length(); i++)
165 {
166 int al1 = list[id].allele1[i];
167 int al2 = list[id].allele2[i];
168
169 // Remove offspring genotypes incompatible with parental genotypes
170 if ((maleX && !list[motid].Matches(al1) && al1 != -1) ||
171 (!maleX && !(al1 == -1 && al2 == -1) &&
172 !(list[fatid].Matches(al1) && (al2 == -1 || list[motid].Matches(al2))) &&
173 !((al2 == -1 || list[fatid].Matches(al2)) && list[motid].Matches(al1))))
174 {
175 list[id].Delete(i--);
176 changed = true;
177 }
178 }
179
180 // The offspring genotype list allows for a wild-card untransmitted allele
181 // so any single parental genotype is possible
182 if (list[id].Matches(-1))
183 continue;
184
185 // Check if genotypes are consistent with offspring genotypes
186 for (int i = 0; i < list[motid].allele1.Length(); i++)
187 {
188 int al1 = list[motid].allele1[i];
189 int al2 = list[motid].allele2[i];
190
191 // Remove genotypes incompatible with offspring genotypes
192 if (!list[id].Matches(al1) &&
193 !list[id].Matches(al2))
194 {
195 list[motid].Delete(i--);
196 changed = true;
197 }
198 }
199
200 // Males don't affect genotype lists for their fathers
201 if (maleX) continue;
202
203 // Check if genotypes are consistent with offspring genotypes
204 for (int i = 0; i < list[fatid].allele1.Length(); i++)
205 {
206 int al1 = list[fatid].allele1[i];
207 int al2 = list[fatid].allele2[i];
208
209 // Remove genotypes incompatible with offspring genotypes
210 if (!list[id].Matches(al1) &&
211 !list[id].Matches(al2))
212 {
213 list[fatid].Delete(i--);
214 changed = true;
215 }
216 }
217
218 #ifdef DEBUG_ELIMINATOR
219 printf("Done checking individual %s\n", (const char *) person.pid);
220 Print(list, ped, family, 0);
221 #endif
222 }
223
224 return changed;
225 }
226
227
FamilyCheck(GenotypeList * list,Pedigree & ped,Family * family)228 bool GenotypeList::FamilyCheck(GenotypeList * list, Pedigree & ped, Family * family)
229 {
230 #ifdef DEBUG_ELIMINATOR
231 printf("Checking Nuclear Families ...\n");
232 #endif
233
234 bool changed = false;
235
236 for (int i = family->count - 1; i >= family->founders; i--)
237 {
238 Person & person = ped[family->path[i]];
239
240 int fatid = person.father->traverse;
241 int motid = person.mother->traverse;
242
243 // Only go through the loop once per sibship
244 if (person.sibs[0] != &person || list[fatid].ignore || list[motid].ignore)
245 continue;
246
247 #ifdef DEBUG_ELIMINATOR
248 printf("Checking Sibship with %s ...\n", (const char *) person.pid);
249 #endif
250
251 // Reset checked genotypes for the mother, father and child
252 list[fatid].checked = 0;
253 list[motid].checked = 0;
254
255 for (int i = 0; i < person.sibCount; i++)
256 list[person.sibs[i]->traverse].checked = 0;
257
258 // Go through each of the paternal genotypes
259 changed |= TrimParent(list, person, fatid, motid);
260
261 // Go through each of maternal genotypes
262 changed |= TrimParent(list, person, motid, fatid);
263
264 // Sort out the unchecked offspring genotypes ...
265 for (int i = 0; i < person.sibCount; i++)
266 {
267 int sibid = person.sibs[i]->traverse;
268 bool maleX = person.sibs[i]->sex == SEX_MALE && ped.chromosomeX;
269
270 // For dealing with male X chromosomes, the pairwise check is all we need
271 if (maleX) continue;
272
273 for (int j = list[sibid].checked; j < list[sibid].allele1.Length(); j++)
274 changed |= Cleanup(list, person, motid, fatid, sibid, j);
275 }
276
277 #ifdef DEBUG_ELIMINATOR
278 // Print(list, ped, family, 0);
279 #endif
280 }
281
282 return changed;
283 }
284
Matches(int genotype,int allele)285 bool GenotypeList::Matches(int genotype, int allele)
286 {
287 return allele1[genotype] == allele || allele2[genotype] == allele;
288 }
289
Matches(int allele)290 bool GenotypeList::Matches(int allele)
291 {
292 return allele1.Find(allele) != -1 || allele2.Find(allele) != -1;
293 }
294
SaveGenotype(int genotype)295 int GenotypeList::SaveGenotype(int genotype)
296 {
297 if (checked > genotype)
298 return genotype;
299
300 if (checked != genotype)
301 {
302 allele1.Swap(genotype, checked);
303 allele2.Swap(genotype, checked);
304 }
305
306 return checked++;
307 }
308
CheckTrio(GenotypeList * list,int fatid,int motid,int child,int i,int j,int k)309 bool GenotypeList::CheckTrio(GenotypeList * list, int fatid, int motid, int child,
310 int i, int j, int k)
311 {
312 // TODO: add tests for this code...
313 return (list[fatid].Matches(i, list[child].allele1[k]) &&
314 (list[motid].Matches(j, list[child].allele2[k]) || list[child].allele2[k] == -1)) ||
315 ((list[fatid].Matches(i, list[child].allele2[k]) || list[child].allele2[k] == -1) &&
316 list[motid].Matches(j, list[child].allele1[k])) ||
317 (list[child].allele1[k] == -1 && list[child].allele2[k] == -1);
318 }
319
Dimension(int genotypes)320 void GenotypeList::Dimension(int genotypes)
321 {
322 allele1.Dimension(genotypes);
323 allele2.Dimension(genotypes);
324 }
325
SetGenotype(int genotype,int al1,int al2)326 void GenotypeList::SetGenotype(int genotype, int al1, int al2)
327 {
328 allele1[genotype] = al1;
329 allele2[genotype] = al2;
330 }
331
Delete(int genotype)332 void GenotypeList::Delete(int genotype)
333 {
334 allele1.Delete(genotype);
335 allele2.Delete(genotype);
336 }
337
TrimParent(GenotypeList * list,Person & person,int motid,int fatid)338 bool GenotypeList::TrimParent(GenotypeList * list, Person & person, int motid, int fatid)
339 {
340 bool trimmed = false;
341
342 while (list[motid].checked < list[motid].allele1.Length())
343 {
344 int current = list[motid].allele1.Length() - 1;
345 bool saved = false;
346
347 // Pair it with each possible paternal genotype
348 for (int i = list[fatid].allele1.Length() - 1; i >= 0; i--)
349 {
350 int matches = 0;
351
352 // Find out if the pairing is compatible with at least one genotype for each child
353 for (int j = 0; j < person.sibCount; j++)
354 {
355 int sibid = person.sibs[j]->traverse;
356 int maleX = person.sibs[j]->sex == SEX_MALE && person.chromosomeX;
357
358 // Since we have done the pairwise check, there is nothing more
359 // to do for males ...
360 if (list[sibid].ignore || maleX)
361 {
362 matches++;
363 continue;
364 }
365
366 for (int k = list[sibid].allele1.Length() - 1; k >= 0; k--)
367 if (CheckTrio(list, motid, fatid, sibid, current, i, k))
368 {
369 matches++;
370 break;
371 }
372
373 if (matches != j + 1)
374 break;
375 }
376
377 // Save maternal and paternal genotypes, mark all compatible sibling genotypes
378 if (matches == person.sibCount)
379 {
380 for (int j = 0; j < person.sibCount; j++)
381 {
382 int sibid = person.sibs[j]->traverse;
383
384 for (int k = list[sibid].checked; k < list[sibid].allele1.Length(); k++)
385 if (CheckTrio(list, motid, fatid, sibid, current, i, k))
386 list[sibid].SaveGenotype(k);
387 }
388
389 list[motid].SaveGenotype(current);
390 list[fatid].SaveGenotype(i);
391
392 saved = true;
393
394 break;
395 }
396 }
397
398 if (!saved)
399 {
400 list[motid].Delete(current);
401 trimmed = true;
402 }
403 }
404
405 return trimmed;
406 }
407
Cleanup(GenotypeList * list,Person & person,int motid,int fatid,int child,int geno)408 bool GenotypeList::Cleanup(GenotypeList * list, Person & person, int motid, int fatid, int child, int geno)
409 {
410 for (int current = 0; current < list[motid].allele1.Length(); current++)
411 for (int i = list[fatid].allele1.Length() - 1; i >= 0; i--)
412 if (CheckTrio(list, motid, fatid, child, current, i, geno))
413 {
414 int matches = 0;
415
416 // Find out if the pairing is compatible with at least one genotype for each child
417 for (int j = 0; j < person.sibCount; j++)
418 {
419 int sibid = person.sibs[j]->traverse;
420 int maleX = person.sibs[j]->sex == SEX_MALE && person.chromosomeX;
421
422 // After completing the pairwise check, all males are guaranteed
423 // to be compatible with their mothers
424 if (list[sibid].ignore || maleX)
425 {
426 matches++;
427 continue;
428 }
429
430 for (int k = list[sibid].allele1.Length() - 1; k >= 0; k--)
431 if (CheckTrio(list, motid, fatid, sibid, current, i, k))
432 {
433 matches++;
434 break;
435 }
436
437 if (matches != j + 1)
438 break;
439 }
440
441 // Update list of compatible sibling genotypes
442 if (matches == person.sibCount)
443 for (int j = 0; j < person.sibCount; j++)
444 {
445 int sibid = person.sibs[j]->traverse;
446
447 for (int k = list[sibid].checked; k < list[sibid].allele1.Length(); k++)
448 if (CheckTrio(list, motid, fatid, sibid, current, i, k))
449 list[sibid].SaveGenotype(k);
450
451 return false;
452 }
453 }
454
455 list[child].Delete(geno);
456
457 return true;
458 }
459
Print(GenotypeList * list,Pedigree & ped,Family * family,int marker)460 void GenotypeList::Print(GenotypeList * list, Pedigree & ped, Family * family, int marker)
461 {
462 MarkerInfo * info = ped.GetMarkerInfo(marker);
463
464 for (int i = 0; i < family->count; i++)
465 {
466 printf("%s - ", (const char *) ped[family->path[i]].pid);
467
468 for (int j = 0; j < list[i].allele1.Length(); j++)
469 {
470 if (list[i].allele1[j] == -1)
471 printf("*/");
472 else
473 printf("%s/", (const char *) info->GetAlleleLabel(list[i].allele1[j]));
474
475 if (list[i].allele2[j] == -1)
476 printf("* ");
477 else
478 printf("%s ", (const char *) info->GetAlleleLabel(list[i].allele2[j]));
479 }
480
481 printf("\n");
482 }
483 printf("\n");
484 }
485
486