1 // File Description 2 /// \file BamHeader.h 3 /// \brief Defines the BamHeader class. 4 // 5 // Author: Derek Barnett 6 7 #ifndef BAMHEADER_H 8 #define BAMHEADER_H 9 10 #include <cstddef> 11 #include <cstdint> 12 #include <memory> 13 #include <stdexcept> 14 #include <string> 15 #include <vector> 16 17 #include "pbbam/Config.h" 18 #include "pbbam/ProgramInfo.h" 19 #include "pbbam/ReadGroupInfo.h" 20 #include "pbbam/SequenceInfo.h" 21 22 namespace PacBio { 23 namespace BAM { 24 25 namespace internal { 26 class BamHeaderPrivate; 27 } 28 29 /// \brief The BamHeader class represents the header section of the %BAM file. 30 /// 31 /// It provides metadata about the file including file version, reference 32 /// sequences, read groups, comments, etc. 33 /// 34 /// A BamHeader may be fetched from a BamFile to view an existing file's 35 /// metadata. Or one may be created/edited for use with writing to a new file 36 /// (via BamWriter). 37 /// 38 /// \note A particular BamHeader is likely to be re-used in lots of places 39 /// throughout the library, for read-only purposes. For this reason, even 40 /// though a BamHeader may be returned by value, it is essentially a thin 41 /// wrapper for a shared-pointer to the actual data. This means, though, 42 /// that if you need to edit an existing BamHeader for use with a 43 /// BamWriter, please consider using BamHeader::DeepCopy. Otherwise any 44 /// modifications will affect all BamHeaders that are sharing its 45 /// underlying data. 46 /// 47 class PBBAM_EXPORT BamHeader 48 { 49 public: 50 /// \name Constructors & Related Methods 51 /// \{ 52 53 /// 54 /// \brief Creates a BamHeader from SAM-formatted text 55 /// \param samHeaderText 56 /// 57 BamHeader(const std::string& samHeaderText); 58 59 BamHeader(); 60 BamHeader(const BamHeader&) = default; 61 BamHeader(BamHeader&&) = default; 62 BamHeader& operator=(const BamHeader&) = default; 63 BamHeader& operator=(BamHeader&&) = default; 64 ~BamHeader() = default; 65 66 /// \brief Detaches underlying data from the shared-pointer, returning a 67 /// independent copy of the header contents. 68 /// 69 /// This ensures that any modifications to the newly returned BamHeader do 70 /// not affect other BamHeader objects that were sharing its underlying data. 71 /// 72 BamHeader DeepCopy() const; 73 74 /// \} 75 76 public: 77 /// \name Operators 78 /// \{ 79 80 /// \brief Merges another header with this one. 81 /// 82 /// Headers must be compatible for merging. This means that their Version, 83 /// SortOrder, PacBioBamVersion (and in the case of aligned BAM data, 84 /// Sequences) must all match. If not, an exception will be thrown. 85 /// 86 /// \param[in] other header to merge with this one 87 /// \returns reference to this header 88 /// 89 /// \throws std::runtime_error if the headers are not compatible 90 /// 91 BamHeader& operator+=(const BamHeader& other); 92 93 /// \brief Creates a new, merged header. 94 /// 95 /// Headers must be compatible for merging. This means that their Version, 96 /// SortOrder, PacBioBamVersion (and in the case of aligned BAM data, 97 /// Sequences) must all match. If not, an exception will be thrown. 98 /// 99 /// Both original headers (this header and \p other) will not be modified. 100 /// 101 /// \param[in] other header to merge with this one 102 /// \returns merged header 103 /// 104 /// \throws std::runtime_error if the headers are not compatible 105 /// 106 BamHeader operator+(const BamHeader& other) const; 107 108 /// \} 109 110 public: 111 /// \name General Attributes 112 /// \{ 113 114 /// \returns the %PacBio %BAM version number (\@HD:pb) 115 /// 116 /// \note This is different from the SAM/BAM version number 117 /// \sa BamHeader::Version. 118 /// 119 std::string PacBioBamVersion() const; 120 121 /// \returns the sort order used 122 /// 123 /// Valid values: "unknown", "unsorted", "queryname", or "coordinate" 124 /// 125 std::string SortOrder() const; 126 127 /// \returns the SAM/BAM version number (\@HD:VN) 128 /// 129 /// \note This is different from the %PacBio %BAM version number 130 /// \sa BamHeader::PacBioBamVersion 131 /// 132 std::string Version() const; 133 134 /// \} 135 136 public: 137 /// \name Read Groups 138 /// \{ 139 140 /// \returns true if the header contains a read group with \p id (\@RG:ID) 141 bool HasReadGroup(const std::string& id) const; 142 143 /// \returns a ReadGroupInfo object representing the read group matching 144 /// \p id (\@RG:ID) 145 /// \throws std::runtime_error if \p id is unknown 146 /// 147 ReadGroupInfo ReadGroup(const std::string& id) const; 148 149 /// \returns vector of read group IDs listed in this header 150 std::vector<std::string> ReadGroupIds() const; 151 152 /// \returns vector of ReadGroupInfo objects, representing all read groups 153 /// listed in this header 154 /// 155 std::vector<ReadGroupInfo> ReadGroups() const; 156 157 /// \} 158 159 public: 160 /// \name Sequences 161 /// \{ 162 163 /// \returns true if header contains a sequence with \p name (\@SQ:SN) 164 bool HasSequence(const std::string& name) const; 165 166 /// \returns number of sequences (\@SQ entries) stored in this header 167 size_t NumSequences() const; 168 169 /// \returns numeric ID for sequence matching \p name (\@SQ:SN) 170 /// 171 /// This is the numeric ID used elsewhere throughout the API. 172 /// 173 /// \throws std::runtime_error if \p name is unknown 174 /// \sa BamReader::ReferenceId, PbiReferenceIdFilter, 175 /// PbiRawMappedData::tId_ 176 /// 177 int32_t SequenceId(const std::string& name) const; 178 179 /// \returns the length of the sequence (\@SQ:LN, e.g. chromosome length) at 180 /// index \p id 181 /// 182 /// \sa SequenceInfo::Length, BamHeader::SequenceId 183 /// 184 std::string SequenceLength(const int32_t id) const; 185 186 /// \returns the name of the sequence (\@SQ:SN) at index \p id 187 /// 188 /// \sa SequenceInfo::Name, BamHeader::SequenceId 189 /// 190 std::string SequenceName(const int32_t id) const; 191 192 /// \returns vector of sequence names (\@SQ:SN) stored in this header 193 /// 194 /// Position in the vector is equivalent to SequenceId. 195 /// 196 std::vector<std::string> SequenceNames() const; 197 198 /// \returns SequenceInfo object at index \p id 199 /// 200 /// \throws std::out_of_range if \p is an invalid or unknown index 201 /// \sa BamHeader::SequenceId 202 /// 203 SequenceInfo Sequence(const int32_t id) const; 204 205 /// \returns SequenceInfo for the sequence matching \p name 206 SequenceInfo Sequence(const std::string& name) const; 207 208 /// \returns vector of SequenceInfo objects representing the sequences 209 /// (\@SQ entries) stored in this header 210 /// 211 std::vector<SequenceInfo> Sequences() const; 212 213 /// \} 214 215 public: 216 /// \name Programs 217 /// \{ 218 219 /// \returns true if this header contains a program entry with ID (\@PG:ID) 220 /// matching \p id 221 /// 222 bool HasProgram(const std::string& id) const; 223 224 /// \returns ProgramInfo object for the program entry matching \p id 225 /// \throws std::runtime_error if \p id is unknown 226 /// 227 ProgramInfo Program(const std::string& id) const; 228 229 /// \returns vector of program IDs (\@PG:ID) 230 std::vector<std::string> ProgramIds() const; 231 232 /// \returns vector of ProgramInfo objects representing program entries 233 /// (\@PG) stored in this heder 234 /// 235 std::vector<ProgramInfo> Programs() const; 236 237 /// \} 238 239 public: 240 /// \name Comments 241 /// \{ 242 243 /// \returns vector of comment (\@CO) strings 244 std::vector<std::string> Comments() const; 245 246 /// \} 247 248 public: 249 /// \name Conversion Methods 250 /// \{ 251 252 /// \returns SAM-header-formatted string representing this header's data 253 std::string ToSam() const; 254 255 /// \} 256 257 public: 258 /// \name General Attributes 259 /// \{ 260 261 /// \brief Sets this header's PacBioBAM version number (\@HD:pb). 262 /// 263 /// \returns reference to this object 264 /// \throws std::runtime_error if version number cannot be parsed or 265 /// is less than the minimum version allowed. 266 /// 267 BamHeader& PacBioBamVersion(const std::string& version); 268 269 /// \brief Sets this header's sort order label (\@HD:SO). 270 /// 271 /// Valid values: "unknown", "unsorted", "queryname", or "coordinate" 272 /// 273 /// \returns reference to this object 274 /// 275 BamHeader& SortOrder(std::string order); 276 277 /// \brief Sets this header's SAM/BAM version number (\@HD:VN). 278 /// 279 /// \returns reference to this object 280 /// 281 BamHeader& Version(std::string version); 282 283 /// \} 284 285 public: 286 /// \name Read Groups 287 /// \{ 288 289 /// \brief Appends a read group entry (\@RG) to this header. 290 /// 291 /// \returns reference to this object 292 /// 293 BamHeader& AddReadGroup(ReadGroupInfo readGroup); 294 295 /// \brief Removes all read group entries from this header. 296 /// 297 /// \returns reference to this object 298 /// 299 BamHeader& ClearReadGroups(); 300 301 /// \brief Replaces this header's list of read group entries with those in 302 /// \p readGroups. 303 /// 304 /// \returns reference to this object 305 /// 306 BamHeader& ReadGroups(std::vector<ReadGroupInfo> readGroups); 307 308 /// \} 309 310 public: 311 /// \name Sequences 312 /// \{ 313 314 /// \brief Appends a sequence entry (\@SQ) to this header. 315 /// 316 /// \returns reference to this object 317 /// 318 BamHeader& AddSequence(SequenceInfo sequence); 319 320 /// \brief Removes all sequence entries from this header. 321 /// 322 /// \returns reference to this object 323 /// 324 BamHeader& ClearSequences(); 325 326 /// \brief Replaces this header's list of sequence entries with those in 327 /// \p sequences. 328 /// 329 /// \returns reference to this object 330 /// 331 BamHeader& Sequences(std::vector<SequenceInfo> sequences); 332 333 /// \} 334 335 public: 336 /// \name Programs 337 /// \{ 338 339 /// \brief Appends a program entry (\@PG) to this header. 340 /// 341 /// \returns reference to this object 342 /// 343 BamHeader& AddProgram(ProgramInfo pg); 344 345 /// \brief Removes all program entries from this header. 346 /// 347 /// \returns reference to this object 348 /// 349 BamHeader& ClearPrograms(); 350 351 /// \brief Replaces this header's list of program entries with those in 352 /// \p programs. 353 /// 354 /// \returns reference to this object 355 /// 356 BamHeader& Programs(std::vector<ProgramInfo> programs); 357 358 /// \} 359 360 public: 361 /// \name Comments 362 /// \{ 363 364 /// \brief Appends a comment (\@CO) to this header. 365 /// 366 /// \returns reference to this object 367 /// 368 BamHeader& AddComment(std::string comment); 369 370 /// \brief Removes all comments from this header. 371 /// 372 /// \returns reference to this object 373 /// 374 BamHeader& ClearComments(); 375 376 /// \brief Replaces this header's list of comments with those in \p comments. 377 /// 378 /// \returns reference to this object 379 /// 380 BamHeader& Comments(std::vector<std::string> comments); 381 382 /// \} 383 384 private: 385 std::shared_ptr<internal::BamHeaderPrivate> d_; 386 }; 387 388 } // namespace BAM 389 } // namespace PacBio 390 391 #include "pbbam/internal/BamHeader.inl" 392 393 #endif // BAMHEADER_H 394