1 #ifndef METAPOPINFO_H
2 #define METAPOPINFO_H
3 
4 #include <string>
5 #include <vector>
6 #include <map>
7 
8 #include "constants.h"
9 
10 struct Sample {
11     string name;
12     size_t pop;
13     int id; // optional
14 
SampleSample15     Sample(const string& n) : name(n), pop(SIZE_MAX), id(-1) {}
16     inline bool operator<(const Sample& other) const;
17 };
18 
19 struct Pop {
20     string name;
21     size_t first_sample;
22     size_t last_sample;
23     size_t group;
24 
PopPop25     Pop(const string& n) : name(n), first_sample(SIZE_MAX), last_sample(SIZE_MAX), group(SIZE_MAX) {}
n_samplesPop26     size_t n_samples() const {return last_sample - first_sample + 1;}
27     static const string default_name;
28 };
29 
30 struct Group {
31     string name;
32     vector<size_t> pops;
33 
GroupGroup34     Group(const string& n) : name(n), pops() {}
35     static const string default_name;
36 };
37 
38 /*
39  * MetaPopInfo
40  * Class for reprensenting a metapopulation : its samples, populations,
41  * groups of populations, and associated information.
42  */
43 class MetaPopInfo {
44     vector<Sample> samples_; //n.b. Samples are sorted primarily by population index, and secondarily by name.
45     vector<Pop> pops_;
46     vector<Group> groups_;
47 
48     map<string,size_t> sample_indexes_; // Links a name with an index in [samples_].
49     map<string,size_t> pop_indexes_;
50     map<string,size_t> group_indexes_;
51     void reset_sample_map(); // Resets [sample_indexes_].
52     void reset_pop_map();
53     void reset_group_map();
54 
55     vector<string> orig_sample_order_; // Safeguards the original input sample order.
56     vector<size_t> sample_indexes_orig_order_;
57     void reset_orig_order();
58 
59     map<size_t,size_t> sample_indexes_by_id_; // Links a sample ID with an index in [samples_].
60     void reset_sample_id_map();
61 
62     MetaPopInfo(MetaPopInfo&& other) = delete; // Immovable (for pointer stability).
63 public:
64     MetaPopInfo() = default;
65 
66     // Create the representation :
67     // -- from a population map file.
68     // -- from just a vector of sample names.
69     // -- or by looking for "*.tags.tsv(.gz)" files in a directory.
70     void init_popmap(const string& popmap_path);
71     void init_names(const vector<string>& sample_names);
72     void init_directory(const string& dir_path);
73 
74     // Delete samples from the metapopulation.
75     // (As samples, populations or groups may be deleted, the indexes of
76     // the remaining ones change, but the order in which they appear
77     // is preserved.)
78     void delete_samples(const vector<size_t>& rm_samples);
79 
80     // Intersects the population map with a list of samples.
81     // May call `delete_samples()`.
82     void intersect_with(const vector<string>& samples);
83 
84     // Retrieve information.
samples()85     const vector<Sample>& samples() const {return samples_;}
n_samples()86     size_t n_samples() const {return samples().size();}
pops()87     const vector<Pop>& pops() const {return pops_;}
groups()88     const vector<Group>& groups() const {return groups_;}
89 
sample_indexes_orig_order()90     const vector<size_t>& sample_indexes_orig_order() const {return sample_indexes_orig_order_;}
91 
92     size_t get_sample_index(const string& name, bool must_exist=true) const;
get_pop_index(const string & name)93     size_t get_pop_index(const string& name) const {return pop_indexes_.at(name);}
get_group_index(const string & name)94     size_t get_group_index(const string& name) const {return group_indexes_.at(name);}
95 
96     // Work with sample IDs. (IDs unicity is not enforced.)
set_sample_id(size_t index,size_t id)97     void set_sample_id(size_t index, size_t id) {samples_.at(index).id = id; sample_indexes_by_id_[id] = index;}
get_sample_index(const size_t & id)98     size_t get_sample_index(const size_t& id) const {return sample_indexes_by_id_.at(id);}
99 
100     void status(ostream &fh);
101 
102     /*
103      * Methods for backwards compatibility
104      */
105 
106     // Fill former globals.
107     void fill_files(vector<pair<int, string> >&) const;
108     void fill_sample_ids(vector<int>&) const;
109     void fill_samples(map<int, string>&) const;
110     void fill_pop_key(map<int, string>&) const;
111     void fill_pop_indexes(map<int, pair<int, int> >&) const;
112     void fill_grp_key(map<int, string>&) const;
113     void fill_grp_members(map<int, vector<int> >&) const;
114 };
115 
116 inline
117 bool Sample::operator<(const Sample& other) const {
118     if (pop == other.pop)
119         return name < other.name;
120     else
121         return pop < other.pop;
122 }
123 
124 inline
get_sample_index(const string & name,bool must_exist)125 size_t MetaPopInfo::get_sample_index(
126     const string& name,
127     bool must_exist
128 ) const {
129     if (must_exist) {
130         return sample_indexes_.at(name);
131     } else {
132         auto itr = sample_indexes_.find(name);
133         return (itr == sample_indexes_.end() ? SIZE_MAX : itr->second);
134     }
135 }
136 
137 #endif // METAPOPINFO_H
138