1 // File Description 2 /// \file CompositeBamReader.h 3 /// \brief Defines the composite BAM readers, for working with multiple input 4 /// files. 5 // 6 // Author: Derek Barnett 7 8 #ifndef COMPOSITEBAMREADER_H 9 #define COMPOSITEBAMREADER_H 10 11 #include <deque> 12 #include <functional> 13 #include <memory> 14 #include <string> 15 #include <vector> 16 #include "pbbam/BaiIndexedBamReader.h" 17 #include "pbbam/BamFile.h" 18 #include "pbbam/BamHeader.h" 19 #include "pbbam/BamReader.h" 20 #include "pbbam/BamRecord.h" 21 #include "pbbam/Config.h" 22 #include "pbbam/DataSet.h" 23 #include "pbbam/GenomicInterval.h" 24 #include "pbbam/PbiIndexedBamReader.h" 25 26 namespace PacBio { 27 namespace BAM { 28 29 namespace internal { 30 31 /// \internal 32 /// \brief The CompositeMergeItem class provides a helper struct for composite 33 /// readers, containing a single-file reader and its "next" record. 34 /// 35 struct CompositeMergeItem 36 { 37 public: 38 std::unique_ptr<BamReader> reader; 39 BamRecord record; 40 41 public: 42 CompositeMergeItem(std::unique_ptr<BamReader> rdr); 43 CompositeMergeItem(std::unique_ptr<BamReader> rdr, BamRecord rec); 44 CompositeMergeItem(CompositeMergeItem&&) = default; 45 CompositeMergeItem& operator=(CompositeMergeItem&&) = default; 46 ~CompositeMergeItem() = default; 47 }; 48 49 /// \internal 50 /// \brief The CompositeMergeItemSorter class provides a helper function object 51 /// for ordering composite reader results. 52 /// 53 /// Essentially just exracts a BamRecord from its parent CompositeMergeItem for 54 /// further checks. 55 /// 56 template <typename CompareType> 57 struct CompositeMergeItemSorter 58 : public std::function<bool(const CompositeMergeItem&, const CompositeMergeItem&)> 59 { 60 bool operator()(const CompositeMergeItem& lhs, const CompositeMergeItem& rhs); 61 }; 62 63 } // namespace internal 64 65 /// \brief The GenomicIntervalCompositeBamReader class provides read access to 66 /// multipe %BAM files, limiting results to a genomic region. 67 /// 68 /// Requires a ".bai" file for each input %BAM file. 69 /// 70 /// Results will be returned in order of genomic coordinate (first by reference 71 /// ID, then by position). 72 /// 73 class PBBAM_EXPORT GenomicIntervalCompositeBamReader 74 { 75 public: 76 /// \name Contstructors & Related Methods 77 /// \{ 78 79 GenomicIntervalCompositeBamReader(const GenomicInterval& interval, 80 const std::vector<BamFile>& bamFiles); 81 GenomicIntervalCompositeBamReader(const GenomicInterval& interval, const DataSet& dataset); 82 83 /// \} 84 85 public: 86 /// \name Data Access 87 /// \{ 88 89 /// Fetches next BAM record in the interval specified, storing in \p record 90 /// 91 /// \param[out] record 92 /// \returns true on success, false if no more data available. 93 /// 94 bool GetNext(BamRecord& record); 95 96 /// Sets a new genomic interval of interest. 97 /// 98 /// \returns reference to this reader 99 /// 100 GenomicIntervalCompositeBamReader& Interval(const GenomicInterval& interval); 101 102 /// \returns the current specified interval 103 /// 104 const GenomicInterval& Interval() const; 105 106 /// \} 107 108 private: 109 void UpdateSort(); 110 111 private: 112 GenomicInterval interval_; 113 std::deque<internal::CompositeMergeItem> mergeItems_; 114 std::vector<std::string> filenames_; 115 }; 116 117 /// \brief Provides read access to multipe %BAM files, limiting results to those 118 /// passing a PbiFilter. 119 /// 120 /// Requires a ".pbi" file for each input %BAM file. 121 /// 122 /// \note The template parameter OrderByType is not fully implemented at this 123 /// time. Use of comparison functor (e.g. Compare::Zmw) for this will 124 /// currently result in the proper "next" value <b> at each iteration 125 /// step, independently, but not over the full data set. </b> If all 126 /// files' "order-by" data values are accessible in increasing order 127 /// within each file, then the expected ordering will be observed, 128 /// However, if these data are not sorted within a file, the final results 129 /// will appear unordered. \n 130 /// \n 131 /// Example:\n 132 /// file 1: { 1, 5, 2, 6 } \n 133 /// file 2: { 3, 8, 4, 7 } \n 134 /// results: { 1, 3, 5, 2, 6, 8, 4, 7 } \n 135 /// \n 136 /// This a known issue and will be addressed in a future update. But in 137 /// the meantime, use of Compare::None as the OrderByType is recommended, 138 /// to explicitly indicate that no particular ordering is expected. 139 /// 140 template <typename OrderByType> 141 class PBBAM_EXPORT PbiFilterCompositeBamReader 142 { 143 public: 144 using value_type = internal::CompositeMergeItem; 145 using merge_sorter_type = internal::CompositeMergeItemSorter<OrderByType>; 146 using container_type = std::deque<value_type>; 147 using iterator = typename container_type::iterator; 148 using const_iterator = typename container_type::const_iterator; 149 150 public: 151 /// \name Contstructors & Related Methods 152 /// \{ 153 154 PbiFilterCompositeBamReader(const PbiFilter& filter, const std::vector<BamFile>& bamFiles); 155 PbiFilterCompositeBamReader(const PbiFilter& filter, const DataSet& dataset); 156 157 /// \} 158 159 public: 160 /// \name Data Access 161 /// \{ 162 163 /// Fetches next BAM record in the interval specified. 164 /// 165 /// \returns true on success, false if no more data available. 166 /// 167 bool GetNext(BamRecord& record); 168 169 /// Sets a new PBI filter 170 /// 171 /// \returns reference to this reader 172 /// 173 PbiFilterCompositeBamReader& Filter(const PbiFilter& filter); 174 175 uint32_t NumReads() const; 176 177 /// \} 178 179 private: 180 void UpdateSort(); 181 182 private: 183 container_type mergeQueue_; 184 std::vector<std::string> filenames_; 185 uint32_t numReads_; 186 }; 187 188 /// \brief The SequentialCompositeBamReader class provides read access to 189 /// multiple %BAM files, reading through the entire contents of each 190 /// file. 191 /// 192 /// Input files will be accessed in the order provided to the constructor. Each 193 /// file's contents will be exhausted before moving on to the next one (as 194 /// opposed to a "round-robin" scheme). 195 /// 196 class PBBAM_EXPORT SequentialCompositeBamReader 197 { 198 public: 199 /// \name Contstructors & Related Methods 200 /// \{ 201 202 SequentialCompositeBamReader(std::vector<BamFile> bamFiles); 203 SequentialCompositeBamReader(const DataSet& dataset); 204 205 /// \} 206 207 public: 208 /// \name Data Access 209 /// \{ 210 211 /// Fetches next BAM record from the . 212 /// 213 /// \returns true on success, false if no more data available. 214 /// 215 bool GetNext(BamRecord& record); 216 217 /// \} 218 219 private: 220 std::deque<std::unique_ptr<BamReader> > readers_; 221 }; 222 223 } // namespace BAM 224 } // namespace PacBio 225 226 #include "pbbam/internal/CompositeBamReader.inl" 227 228 #endif // COMPOSITEBAMREADER_H 229