1 /* $Id: agp_seq_entry.hpp 632526 2021-06-02 17:25:01Z ivanov $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Authors: Mike DiCuccio, Michael Kornbluh 27 * 28 * File Description: 29 * Convert an AGP file into a vector of Seq-entries 30 * 31 */ 32 33 #ifndef __OBJTOOLS_READERS_AGP_SEQ_ENTRY__HPP_ 34 #define __OBJTOOLS_READERS_AGP_SEQ_ENTRY__HPP_ 35 36 #include <objtools/readers/agp_util.hpp> 37 38 #include <objects/seq/Bioseq.hpp> 39 40 BEGIN_NCBI_SCOPE 41 42 namespace objects { 43 class CBioseq; 44 class CSeq_entry; 45 class CSeq_id; 46 class CSeq_gap; 47 } 48 49 /// This class is used to turn an AGP file into a vector of Seq-entry's 50 class NCBI_XOBJREAD_EXPORT CAgpToSeqEntry : public CAgpReader { 51 public: 52 53 /// This is the way the results will be returned 54 /// Each Seq-entry contains just one Bioseq, built from the AGP file(s). 55 typedef vector< CRef<objects::CSeq_entry> > TSeqEntryRefVec; 56 57 enum EFlags { 58 /// Found gaps will not be given Seq-data such as Type and Linkage 59 fSetSeqGap = (1 << 0), 60 /// All IDs will be treated as local IDs. 61 /// The default if this is NOT set is to first try to parse the ID, 62 /// and only make local if parsing fails. 63 fForceLocalId = (1 << 1) 64 }; 65 typedef int TFlags; 66 67 /// After construction, you probably want to do something like 68 /// call ReadStream and then GetResult. 69 /// 70 /// @param agp_version 71 /// What is the AGP version of the input? Default is to auto-detect AGP version, 72 /// which is likely what the user wants to do most of the time. 73 CAgpToSeqEntry( TFlags fFlags = 0, 74 EAgpVersion agp_version = eAgpVersion_auto, 75 CAgpErr* arg = NULL ); 76 77 /// This gets the results found, but don't call before finalizing. We are intentionally 78 /// giving a non-const reference because the caller is free to 79 /// take the seq-entries inside and do whatever they like with them. 80 /// Each Seq-entry contains just one Bioseq, built from the AGP file(s). GetResult(void)81 TSeqEntryRefVec & GetResult(void) { return m_entries; } 82 83 /// This is the default method used to turn strings into Seq-ids in AGP contexts. 84 /// 85 /// @sa x_GetSeqIdFromStr 86 static CRef<objects::CSeq_id> s_DefaultSeqIdFromStr( const std::string & str ); 87 88 /// Turn a string into a local Seq-id (removing "lcl|" from the beginning if needed) 89 static CRef<objects::CSeq_id> s_LocalSeqIdFromStr( const std::string & str ); 90 91 protected: 92 93 const TFlags m_fFlags; 94 95 /// Builds new part of delta-seq in current bioseq, or adds bioseq 96 /// and starts building a new one. 97 virtual void OnGapOrComponent(void); 98 99 /// Parent finalize plus making sure last m_bioseq is added. 100 virtual int Finalize(void); 101 102 /// Our own finalization after parent's finalization. 103 void x_FinishedBioseq(void); 104 105 /// If you must change exactly how strings are turned into Seq-ids, 106 /// you can override this in a subclass. The default 107 // is to use s_DefaultSeqIdFromStr. 108 virtual CRef<objects::CSeq_id> x_GetSeqIdFromStr( const std::string & str ); 109 110 /// Fills in out_gap_info based on current CAgpRow 111 void x_SetSeqGap( objects::CSeq_gap & out_gap_info ); 112 113 /// This is the bioseq currently being built 114 CRef<objects::CBioseq> m_bioseq; 115 /// Holds the results 116 vector< CRef<objects::CSeq_entry> > m_entries; 117 118 private: 119 120 // forbid copy and assignment 121 CAgpToSeqEntry( const CAgpToSeqEntry & ); 122 CAgpToSeqEntry & operator = (const CAgpToSeqEntry & ); 123 }; 124 125 END_NCBI_SCOPE 126 127 #endif // end of "include-guard" 128