1 /*
2  *  Copyright (C) 2010  Regents of the University of Michigan
3  *
4  *   This program is free software: you can redistribute it and/or modify
5  *   it under the terms of the GNU General Public License as published by
6  *   the Free Software Foundation, either version 3 of the License, or
7  *   (at your option) any later version.
8  *
9  *   This program is distributed in the hope that it will be useful,
10  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *   GNU General Public License for more details.
13  *
14  *   You should have received a copy of the GNU General Public License
15  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "GenotypeLists.h"
19 
20 // When the next line is uncommented, the genotype elimination routines
21 // produce a lot of output useful for debugging
22 // #define DEBUG_ELIMINATOR
23 
GenotypeList()24 GenotypeList::GenotypeList()
25 {
26     ignore = false;
27 }
28 
EliminateGenotypes(Pedigree & ped,Family * family,int marker)29 bool GenotypeList::EliminateGenotypes(Pedigree & ped, Family * family, int marker)
30 {
31     // First, allocate a genotype list for the family
32     GenotypeList * list = new GenotypeList [family->count];
33 
34     // Next, update the possible allele lists for each individual
35     InitializeList(list, ped, family, marker);
36 
37     // Then, do multiple rounds of elimination until a problem is found
38     // or no changes are made
39 
40 #ifdef DEBUG_ELIMINATOR
41     Print(list, ped, family, marker);
42 #endif
43 
44     while (PairwiseCheck(list, ped, family) || FamilyCheck(list, ped, family))
45 #ifdef DEBUG_ELIMINATOR
46         Print(list, ped, family, marker)
47 #endif
48         ;
49 
50     for (int i = 0; i < family->count; i++)
51         if (!list[i].ignore && list[i].allele1.Length() == 0)
52         {
53             printf("%s - Family %s has a subtle genotype inconsistency\n",
54                    (const char *) ped.markerNames[marker], (const char *) family->famid);
55 
56             delete [] list;
57             return false;
58         }
59 
60     delete [] list;
61     return true;
62 }
63 
InitializeList(GenotypeList * list,Pedigree & ped,Family * family,int marker)64 void GenotypeList::InitializeList(GenotypeList * list, Pedigree & ped, Family * family, int marker)
65 {
66     for (int i = family->count - 1; i >= 0; i--)
67     {
68         Person & person = ped[family->path[i]];
69         int id = person.traverse;
70         bool maleX = person.sex == SEX_MALE && ped.chromosomeX;
71 
72 #ifdef DEBUG_ELIMINATOR
73         printf("Initializing genotype list for %s ...\n", (const char *) person.pid);
74 #endif
75 
76         // If an individual is genotyped ...
77         if (person.markers[marker].isKnown())
78         {
79             // Their genotype list starts with just one entry!
80             list[id].Dimension(1);
81             list[id].SetGenotype(0, person.markers[marker][0], person.markers[marker][1]);
82             list[id].alleles.Clear();
83             list[id].alleles.Push(person.markers[marker][0]);
84             list[id].alleles.PushIfNew(person.markers[marker][1]);
85             list[id].ignore = false;
86 
87             // "Heterozygous" males have no possible genotypes
88             if (maleX && person.markers[marker].isHeterozygous())
89                 list[id].Dimension(0);
90         }
91         else if (list[id].alleles.Length())
92             if (person.sex == SEX_MALE && ped.chromosomeX)
93             {
94                 // Males only carry one X chromosome
95                 list[id].Dimension(list[id].alleles.Length() + 1);
96 
97                 for (int i = 0, out = 0; i < list[id].alleles.Length(); i++)
98                     list[id].SetGenotype(out++, list[id].alleles[i], list[id].alleles[i]);
99                 list[id].SetGenotype(list[id].alleles.Length(), -1, -1);
100 
101                 list[id].ignore = false;
102             }
103             else
104             {
105                 // Build the genotype list based on the available allele lists
106                 int count = list[id].alleles.Length() * (list[id].alleles.Length() + 3) / 2 + 1;
107 
108                 list[id].Dimension(count);
109 
110                 for (int i = 0, out = 0; i < list[id].alleles.Length(); i++)
111                 {
112                     // Allow for all pairs of "transmitted" alleles
113                     for (int j = 0; j <= i; j++)
114                         list[id].SetGenotype(out++, list[id].alleles[i], list[id].alleles[j]);
115 
116                     // Allow for an unstransmitted allele
117                     list[id].SetGenotype(out++, list[id].alleles[i], -1);
118                 }
119 
120                 // Allow for a pair of untransmitted alleles
121                 list[id].SetGenotype(count - 1, -1, -1);
122 
123                 list[id].ignore = false;
124             }
125         else
126             list[id].ignore = true;
127 
128         // If the individual is a founder this is all there is to it
129         if (i < family->founders) continue;
130 
131         // If the individual is not a founder, update the parental genotype lists...
132         int fatid = person.father->traverse;
133         int motid = person.mother->traverse;
134 
135         for (int i = 0; i < list[id].alleles.Length(); i++)
136         {
137             list[motid].alleles.PushIfNew(list[id].alleles[i]);
138             if (!maleX) list[fatid].alleles.PushIfNew(list[id].alleles[i]);
139         }
140     }
141 }
142 
PairwiseCheck(GenotypeList * list,Pedigree & ped,Family * family)143 bool GenotypeList::PairwiseCheck(GenotypeList * list, Pedigree & ped, Family * family)
144 {
145 #ifdef DEBUG_ELIMINATOR
146     printf("Checking Relative Pairs ...\n");
147 #endif
148 
149     bool changed = false;
150 
151     for (int i = family->count - 1; i >= family->founders; i--)
152     {
153         Person & person = ped[family->path[i]];
154 
155         int id = person.traverse;
156         int fatid = person.father->traverse;
157         int motid = person.mother->traverse;
158 
159         bool maleX = person.sex == SEX_MALE && ped.chromosomeX;
160 
161         if (list[id].ignore) continue;
162 
163         // Check if genotypes are consistent with paternal genotypes
164         for (int i = 0; i < list[id].allele1.Length(); i++)
165         {
166             int al1 = list[id].allele1[i];
167             int al2 = list[id].allele2[i];
168 
169             // Remove offspring genotypes incompatible with parental genotypes
170             if ((maleX && !list[motid].Matches(al1) && al1 != -1) ||
171                     (!maleX && !(al1 == -1 && al2 == -1) &&
172                      !(list[fatid].Matches(al1) && (al2 == -1 || list[motid].Matches(al2))) &&
173                      !((al2 == -1 || list[fatid].Matches(al2)) && list[motid].Matches(al1))))
174             {
175                 list[id].Delete(i--);
176                 changed = true;
177             }
178         }
179 
180         // The offspring genotype list allows for a wild-card untransmitted allele
181         // so any single parental genotype is possible
182         if (list[id].Matches(-1))
183             continue;
184 
185         // Check if genotypes are consistent with offspring genotypes
186         for (int i = 0; i < list[motid].allele1.Length(); i++)
187         {
188             int al1 = list[motid].allele1[i];
189             int al2 = list[motid].allele2[i];
190 
191             // Remove genotypes incompatible with offspring genotypes
192             if (!list[id].Matches(al1) &&
193                     !list[id].Matches(al2))
194             {
195                 list[motid].Delete(i--);
196                 changed = true;
197             }
198         }
199 
200         // Males don't affect genotype lists for their fathers
201         if (maleX) continue;
202 
203         // Check if genotypes are consistent with offspring genotypes
204         for (int i = 0; i < list[fatid].allele1.Length(); i++)
205         {
206             int al1 = list[fatid].allele1[i];
207             int al2 = list[fatid].allele2[i];
208 
209             // Remove genotypes incompatible with offspring genotypes
210             if (!list[id].Matches(al1) &&
211                     !list[id].Matches(al2))
212             {
213                 list[fatid].Delete(i--);
214                 changed = true;
215             }
216         }
217 
218 #ifdef DEBUG_ELIMINATOR
219         printf("Done checking individual %s\n", (const char *) person.pid);
220         Print(list, ped, family, 0);
221 #endif
222     }
223 
224     return changed;
225 }
226 
227 
FamilyCheck(GenotypeList * list,Pedigree & ped,Family * family)228 bool GenotypeList::FamilyCheck(GenotypeList * list, Pedigree & ped, Family * family)
229 {
230 #ifdef DEBUG_ELIMINATOR
231     printf("Checking Nuclear Families ...\n");
232 #endif
233 
234     bool changed = false;
235 
236     for (int i = family->count - 1; i >= family->founders; i--)
237     {
238         Person & person = ped[family->path[i]];
239 
240         int fatid = person.father->traverse;
241         int motid = person.mother->traverse;
242 
243         // Only go through the loop once per sibship
244         if (person.sibs[0] != &person || list[fatid].ignore || list[motid].ignore)
245             continue;
246 
247 #ifdef DEBUG_ELIMINATOR
248         printf("Checking Sibship with %s ...\n", (const char *) person.pid);
249 #endif
250 
251         // Reset checked genotypes for the mother, father and child
252         list[fatid].checked = 0;
253         list[motid].checked = 0;
254 
255         for (int i = 0; i < person.sibCount; i++)
256             list[person.sibs[i]->traverse].checked = 0;
257 
258         // Go through each of the paternal genotypes
259         changed |= TrimParent(list, person, fatid, motid);
260 
261         // Go through each of maternal genotypes
262         changed |= TrimParent(list, person, motid, fatid);
263 
264         // Sort out the unchecked offspring genotypes ...
265         for (int i = 0; i < person.sibCount; i++)
266         {
267             int sibid = person.sibs[i]->traverse;
268             bool maleX = person.sibs[i]->sex == SEX_MALE && ped.chromosomeX;
269 
270             // For dealing with male X chromosomes, the pairwise check is all we need
271             if (maleX) continue;
272 
273             for (int j = list[sibid].checked; j < list[sibid].allele1.Length(); j++)
274                 changed |= Cleanup(list, person, motid, fatid, sibid, j);
275         }
276 
277 #ifdef DEBUG_ELIMINATOR
278 //      Print(list, ped, family, 0);
279 #endif
280     }
281 
282     return changed;
283 }
284 
Matches(int genotype,int allele)285 bool GenotypeList::Matches(int genotype, int allele)
286 {
287     return allele1[genotype] == allele || allele2[genotype] == allele;
288 }
289 
Matches(int allele)290 bool GenotypeList::Matches(int allele)
291 {
292     return allele1.Find(allele) != -1 || allele2.Find(allele) != -1;
293 }
294 
SaveGenotype(int genotype)295 int GenotypeList::SaveGenotype(int genotype)
296 {
297     if (checked > genotype)
298         return genotype;
299 
300     if (checked != genotype)
301     {
302         allele1.Swap(genotype, checked);
303         allele2.Swap(genotype, checked);
304     }
305 
306     return checked++;
307 }
308 
CheckTrio(GenotypeList * list,int fatid,int motid,int child,int i,int j,int k)309 bool GenotypeList::CheckTrio(GenotypeList * list, int fatid, int motid, int child,
310                              int i, int j, int k)
311 {
312     // TODO: add tests for this code...
313     return   (list[fatid].Matches(i, list[child].allele1[k]) &&
314              (list[motid].Matches(j, list[child].allele2[k]) || list[child].allele2[k] == -1)) ||
315              ((list[fatid].Matches(i, list[child].allele2[k]) || list[child].allele2[k] == -1) &&
316              list[motid].Matches(j, list[child].allele1[k])) ||
317              (list[child].allele1[k] == -1 && list[child].allele2[k] == -1);
318 }
319 
Dimension(int genotypes)320 void GenotypeList::Dimension(int genotypes)
321 {
322     allele1.Dimension(genotypes);
323     allele2.Dimension(genotypes);
324 }
325 
SetGenotype(int genotype,int al1,int al2)326 void GenotypeList::SetGenotype(int genotype, int al1, int al2)
327 {
328     allele1[genotype] = al1;
329     allele2[genotype] = al2;
330 }
331 
Delete(int genotype)332 void GenotypeList::Delete(int genotype)
333 {
334     allele1.Delete(genotype);
335     allele2.Delete(genotype);
336 }
337 
TrimParent(GenotypeList * list,Person & person,int motid,int fatid)338 bool GenotypeList::TrimParent(GenotypeList * list, Person & person, int motid, int fatid)
339 {
340     bool trimmed = false;
341 
342     while (list[motid].checked < list[motid].allele1.Length())
343     {
344         int  current = list[motid].allele1.Length() - 1;
345         bool saved = false;
346 
347         // Pair it with each possible paternal genotype
348         for (int i = list[fatid].allele1.Length() - 1; i >= 0; i--)
349         {
350             int matches = 0;
351 
352             // Find out if the pairing is compatible with at least one genotype for each child
353             for (int j = 0; j < person.sibCount; j++)
354             {
355                 int sibid = person.sibs[j]->traverse;
356                 int maleX = person.sibs[j]->sex == SEX_MALE && person.chromosomeX;
357 
358                 // Since we have done the pairwise check, there is nothing more
359                 // to do for males ...
360                 if (list[sibid].ignore || maleX)
361                 {
362                     matches++;
363                     continue;
364                 }
365 
366                 for (int k = list[sibid].allele1.Length() - 1; k >= 0; k--)
367                     if (CheckTrio(list, motid, fatid, sibid, current, i, k))
368                     {
369                         matches++;
370                         break;
371                     }
372 
373                 if (matches != j + 1)
374                     break;
375             }
376 
377             // Save maternal and paternal genotypes, mark all compatible sibling genotypes
378             if (matches == person.sibCount)
379             {
380                 for (int j = 0; j < person.sibCount; j++)
381                 {
382                     int sibid = person.sibs[j]->traverse;
383 
384                     for (int k = list[sibid].checked; k < list[sibid].allele1.Length(); k++)
385                         if (CheckTrio(list, motid, fatid, sibid, current, i, k))
386                             list[sibid].SaveGenotype(k);
387                 }
388 
389                 list[motid].SaveGenotype(current);
390                 list[fatid].SaveGenotype(i);
391 
392                 saved = true;
393 
394                 break;
395             }
396         }
397 
398         if (!saved)
399         {
400             list[motid].Delete(current);
401             trimmed = true;
402         }
403     }
404 
405     return trimmed;
406 }
407 
Cleanup(GenotypeList * list,Person & person,int motid,int fatid,int child,int geno)408 bool GenotypeList::Cleanup(GenotypeList * list, Person & person, int motid, int fatid, int child, int geno)
409 {
410     for (int current = 0; current < list[motid].allele1.Length(); current++)
411         for (int i = list[fatid].allele1.Length() - 1; i >= 0; i--)
412             if (CheckTrio(list, motid, fatid, child, current, i, geno))
413             {
414                 int matches = 0;
415 
416                 // Find out if the pairing is compatible with at least one genotype for each child
417                 for (int j = 0; j < person.sibCount; j++)
418                 {
419                     int sibid = person.sibs[j]->traverse;
420                     int maleX = person.sibs[j]->sex == SEX_MALE && person.chromosomeX;
421 
422                     // After completing the pairwise check, all males are guaranteed
423                     // to be compatible with their mothers
424                     if (list[sibid].ignore || maleX)
425                     {
426                         matches++;
427                         continue;
428                     }
429 
430                     for (int k = list[sibid].allele1.Length() - 1; k >= 0; k--)
431                         if (CheckTrio(list, motid, fatid, sibid, current, i, k))
432                         {
433                             matches++;
434                             break;
435                         }
436 
437                     if (matches != j + 1)
438                         break;
439                 }
440 
441                 // Update list of compatible sibling genotypes
442                 if (matches == person.sibCount)
443                     for (int j = 0; j < person.sibCount; j++)
444                     {
445                         int sibid = person.sibs[j]->traverse;
446 
447                         for (int k = list[sibid].checked; k < list[sibid].allele1.Length(); k++)
448                             if (CheckTrio(list, motid, fatid, sibid, current, i, k))
449                                 list[sibid].SaveGenotype(k);
450 
451                         return false;
452                     }
453             }
454 
455     list[child].Delete(geno);
456 
457     return true;
458 }
459 
Print(GenotypeList * list,Pedigree & ped,Family * family,int marker)460 void GenotypeList::Print(GenotypeList * list, Pedigree & ped, Family * family, int marker)
461 {
462     MarkerInfo * info = ped.GetMarkerInfo(marker);
463 
464     for (int i = 0; i < family->count; i++)
465     {
466         printf("%s - ", (const char *) ped[family->path[i]].pid);
467 
468         for (int j = 0; j < list[i].allele1.Length(); j++)
469         {
470             if (list[i].allele1[j] == -1)
471                 printf("*/");
472             else
473                 printf("%s/", (const char *) info->GetAlleleLabel(list[i].allele1[j]));
474 
475             if (list[i].allele2[j] == -1)
476                 printf("* ");
477             else
478                 printf("%s ", (const char *) info->GetAlleleLabel(list[i].allele2[j]));
479         }
480 
481         printf("\n");
482     }
483     printf("\n");
484 }
485 
486