1 /*
2  *  Copyright (C) 2012  Regents of the University of Michigan
3  *
4  *   This program is free software: you can redistribute it and/or modify
5  *   it under the terms of the GNU General Public License as published by
6  *   the Free Software Foundation, either version 3 of the License, or
7  *   (at your option) any later version.
8  *
9  *   This program is distributed in the hope that it will be useful,
10  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *   GNU General Public License for more details.
13  *
14  *   You should have received a copy of the GNU General Public License
15  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __VCF_SUBSET_SAMPLES_H__
19 #define __VCF_SUBSET_SAMPLES_H__
20 
21 #include <vector>
22 #include <set>
23 #include <string>
24 #include "VcfHeader.h"
25 
26 class VcfSubsetSamples
27 {
28 public:
VcfSubsetSamples()29     VcfSubsetSamples()
30         : mySampleSubsetIndicator(),
31           mySampleNames()
32     {}
33 
~VcfSubsetSamples()34     ~VcfSubsetSamples()
35     {
36         mySampleSubsetIndicator.clear();
37     }
38 
39     void reset();
40 
41     /// Read the samples from the header initiallizing them all to be
42     /// included/excluded based on the include paramater.  The header is
43     /// not stored or updated based on any include/excludes.  The mapping
44     /// between sampleNames & indexes is stored to be used for addIncludeSample
45     /// and addExcludeSample.
46     void init(const VcfHeader& header, bool include);
47 
48     /// Include the specified sample.  initSample must first be called to
49     /// specify the header mapping between index and sample.
50     /// \return true if the sample could be included, false if the sample
51     /// was not found in the sample list so cannot be included.
52     bool addIncludeSample(const char* sampleName);
53 
54     /// Exclude the specified sample.  initSample must first be called to
55     /// specify the header mapping between index and sample.
56     /// \return true if the sample was found in teh sample list and could be
57     /// excluded, false if the sample was not found in the sample list.
58     bool addExcludeSample(const char* sampleName);
59 
60     /// Initialize this object based on the sample names found in sampleFileName
61     /// delimited by any of the characters in delims or '\n' and update the
62     /// header to only include the specified samples.
63     /// This also initializes this class to identify which samples should
64     /// be kept/removed when reading records.
65     bool init(VcfHeader& header, const char* sampleFileName,
66               const char* excludeSample, const char* excludeFileName,
67               const char* delims = "\n");
68 
69     /// Return if the specified original sample index should be kept.
70     /// This is only applicable after calling init.
71     /// If the index is out of range, it will return false (do not keep).
72     /// \param sampleIndex index into the original sample set to check if
73     /// it should be kept.
74     /// \return true if the sample index should be kept, false if not or if
75     /// the index is out of range.
76     bool keep(unsigned int sampleIndex);
77 
78 private:
79     VcfSubsetSamples(const VcfSubsetSamples& vcfSubsetSamples);
80     VcfSubsetSamples& operator=(const VcfSubsetSamples& vcfSubsetSamples);
81 
82     // Read a list of samples from the specified file delimited by any of the
83     // characters in delims or '\n' and store them in the specified container.
84     bool readSamplesFromFile(const char* fileName,
85                              std::set<std::string>& sampleList,
86                              const char* delims="\n");
87 
88 
89     std::vector<bool> mySampleSubsetIndicator;
90 
91     // Used for initSample & addIncludeSample & addExcludeSample for
92     // mapping between original sample names and indexes in
93     // mySampleSubsetIndicator.
94     std::vector<std::string>mySampleNames;
95 };
96 
97 #endif
98