1 /* 2 * Copyright (C) 2012 Regents of the University of Michigan 3 * 4 * This program is free software: you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation, either version 3 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #ifndef __VCF_SUBSET_SAMPLES_H__ 19 #define __VCF_SUBSET_SAMPLES_H__ 20 21 #include <vector> 22 #include <set> 23 #include <string> 24 #include "VcfHeader.h" 25 26 class VcfSubsetSamples 27 { 28 public: VcfSubsetSamples()29 VcfSubsetSamples() 30 : mySampleSubsetIndicator(), 31 mySampleNames() 32 {} 33 ~VcfSubsetSamples()34 ~VcfSubsetSamples() 35 { 36 mySampleSubsetIndicator.clear(); 37 } 38 39 void reset(); 40 41 /// Read the samples from the header initiallizing them all to be 42 /// included/excluded based on the include paramater. The header is 43 /// not stored or updated based on any include/excludes. The mapping 44 /// between sampleNames & indexes is stored to be used for addIncludeSample 45 /// and addExcludeSample. 46 void init(const VcfHeader& header, bool include); 47 48 /// Include the specified sample. initSample must first be called to 49 /// specify the header mapping between index and sample. 50 /// \return true if the sample could be included, false if the sample 51 /// was not found in the sample list so cannot be included. 52 bool addIncludeSample(const char* sampleName); 53 54 /// Exclude the specified sample. initSample must first be called to 55 /// specify the header mapping between index and sample. 56 /// \return true if the sample was found in teh sample list and could be 57 /// excluded, false if the sample was not found in the sample list. 58 bool addExcludeSample(const char* sampleName); 59 60 /// Initialize this object based on the sample names found in sampleFileName 61 /// delimited by any of the characters in delims or '\n' and update the 62 /// header to only include the specified samples. 63 /// This also initializes this class to identify which samples should 64 /// be kept/removed when reading records. 65 bool init(VcfHeader& header, const char* sampleFileName, 66 const char* excludeSample, const char* excludeFileName, 67 const char* delims = "\n"); 68 69 /// Return if the specified original sample index should be kept. 70 /// This is only applicable after calling init. 71 /// If the index is out of range, it will return false (do not keep). 72 /// \param sampleIndex index into the original sample set to check if 73 /// it should be kept. 74 /// \return true if the sample index should be kept, false if not or if 75 /// the index is out of range. 76 bool keep(unsigned int sampleIndex); 77 78 private: 79 VcfSubsetSamples(const VcfSubsetSamples& vcfSubsetSamples); 80 VcfSubsetSamples& operator=(const VcfSubsetSamples& vcfSubsetSamples); 81 82 // Read a list of samples from the specified file delimited by any of the 83 // characters in delims or '\n' and store them in the specified container. 84 bool readSamplesFromFile(const char* fileName, 85 std::set<std::string>& sampleList, 86 const char* delims="\n"); 87 88 89 std::vector<bool> mySampleSubsetIndicator; 90 91 // Used for initSample & addIncludeSample & addExcludeSample for 92 // mapping between original sample names and indexes in 93 // mySampleSubsetIndicator. 94 std::vector<std::string>mySampleNames; 95 }; 96 97 #endif 98