1 #ifndef METAPOPINFO_H
2 #define METAPOPINFO_H
3
4 #include <string>
5 #include <vector>
6 #include <map>
7
8 #include "constants.h"
9
10 struct Sample {
11 string name;
12 size_t pop;
13 int id; // optional
14
SampleSample15 Sample(const string& n) : name(n), pop(SIZE_MAX), id(-1) {}
16 inline bool operator<(const Sample& other) const;
17 };
18
19 struct Pop {
20 string name;
21 size_t first_sample;
22 size_t last_sample;
23 size_t group;
24
PopPop25 Pop(const string& n) : name(n), first_sample(SIZE_MAX), last_sample(SIZE_MAX), group(SIZE_MAX) {}
n_samplesPop26 size_t n_samples() const {return last_sample - first_sample + 1;}
27 static const string default_name;
28 };
29
30 struct Group {
31 string name;
32 vector<size_t> pops;
33
GroupGroup34 Group(const string& n) : name(n), pops() {}
35 static const string default_name;
36 };
37
38 /*
39 * MetaPopInfo
40 * Class for reprensenting a metapopulation : its samples, populations,
41 * groups of populations, and associated information.
42 */
43 class MetaPopInfo {
44 vector<Sample> samples_; //n.b. Samples are sorted primarily by population index, and secondarily by name.
45 vector<Pop> pops_;
46 vector<Group> groups_;
47
48 map<string,size_t> sample_indexes_; // Links a name with an index in [samples_].
49 map<string,size_t> pop_indexes_;
50 map<string,size_t> group_indexes_;
51 void reset_sample_map(); // Resets [sample_indexes_].
52 void reset_pop_map();
53 void reset_group_map();
54
55 vector<string> orig_sample_order_; // Safeguards the original input sample order.
56 vector<size_t> sample_indexes_orig_order_;
57 void reset_orig_order();
58
59 map<size_t,size_t> sample_indexes_by_id_; // Links a sample ID with an index in [samples_].
60 void reset_sample_id_map();
61
62 MetaPopInfo(MetaPopInfo&& other) = delete; // Immovable (for pointer stability).
63 public:
64 MetaPopInfo() = default;
65
66 // Create the representation :
67 // -- from a population map file.
68 // -- from just a vector of sample names.
69 // -- or by looking for "*.tags.tsv(.gz)" files in a directory.
70 void init_popmap(const string& popmap_path);
71 void init_names(const vector<string>& sample_names);
72 void init_directory(const string& dir_path);
73
74 // Delete samples from the metapopulation.
75 // (As samples, populations or groups may be deleted, the indexes of
76 // the remaining ones change, but the order in which they appear
77 // is preserved.)
78 void delete_samples(const vector<size_t>& rm_samples);
79
80 // Intersects the population map with a list of samples.
81 // May call `delete_samples()`.
82 void intersect_with(const vector<string>& samples);
83
84 // Retrieve information.
samples()85 const vector<Sample>& samples() const {return samples_;}
n_samples()86 size_t n_samples() const {return samples().size();}
pops()87 const vector<Pop>& pops() const {return pops_;}
groups()88 const vector<Group>& groups() const {return groups_;}
89
sample_indexes_orig_order()90 const vector<size_t>& sample_indexes_orig_order() const {return sample_indexes_orig_order_;}
91
92 size_t get_sample_index(const string& name, bool must_exist=true) const;
get_pop_index(const string & name)93 size_t get_pop_index(const string& name) const {return pop_indexes_.at(name);}
get_group_index(const string & name)94 size_t get_group_index(const string& name) const {return group_indexes_.at(name);}
95
96 // Work with sample IDs. (IDs unicity is not enforced.)
set_sample_id(size_t index,size_t id)97 void set_sample_id(size_t index, size_t id) {samples_.at(index).id = id; sample_indexes_by_id_[id] = index;}
get_sample_index(const size_t & id)98 size_t get_sample_index(const size_t& id) const {return sample_indexes_by_id_.at(id);}
99
100 void status(ostream &fh);
101
102 /*
103 * Methods for backwards compatibility
104 */
105
106 // Fill former globals.
107 void fill_files(vector<pair<int, string> >&) const;
108 void fill_sample_ids(vector<int>&) const;
109 void fill_samples(map<int, string>&) const;
110 void fill_pop_key(map<int, string>&) const;
111 void fill_pop_indexes(map<int, pair<int, int> >&) const;
112 void fill_grp_key(map<int, string>&) const;
113 void fill_grp_members(map<int, vector<int> >&) const;
114 };
115
116 inline
117 bool Sample::operator<(const Sample& other) const {
118 if (pop == other.pop)
119 return name < other.name;
120 else
121 return pop < other.pop;
122 }
123
124 inline
get_sample_index(const string & name,bool must_exist)125 size_t MetaPopInfo::get_sample_index(
126 const string& name,
127 bool must_exist
128 ) const {
129 if (must_exist) {
130 return sample_indexes_.at(name);
131 } else {
132 auto itr = sample_indexes_.find(name);
133 return (itr == sample_indexes_.end() ? SIZE_MAX : itr->second);
134 }
135 }
136
137 #endif // METAPOPINFO_H
138