1 #ifndef NGALIGN_RESULT_SET__HPP 2 #define NGALIGN_RESULT_SET__HPP 3 4 /* $Id: result_set.hpp 499694 2016-04-27 17:23:05Z boukn $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Authors: Nathan Bouk 30 * 31 * File Description: 32 * 33 */ 34 35 #include <corelib/ncbistd.hpp> 36 #include <corelib/ncbiobj.hpp> 37 #include <objects/seqloc/Na_strand.hpp> 38 39 #include <objects/seqloc/Seq_loc.hpp> 40 #include <objects/seqloc/Seq_id.hpp> 41 #include <objects/genomecoll/GC_Assembly.hpp> 42 #include <objmgr/scope.hpp> 43 44 #include <algo/align/util/align_filter.hpp> 45 46 47 48 BEGIN_NCBI_SCOPE 49 50 BEGIN_SCOPE(objects) 51 class CScope; 52 class CSeq_align; 53 class CSeq_align_set; 54 END_SCOPE(objects) 55 56 BEGIN_SCOPE(blast) 57 class CSearchResultSet; 58 class CSearchResults; 59 END_SCOPE(blast) 60 61 class CSplitSeqAlignMerger; 62 63 64 // Stores one querys worth of alignments 65 class CQuerySet : public CObject 66 { 67 public: 68 69 typedef map<string, CRef<objects::CSeq_align_set> > TSubjectToAlignSet; 70 typedef map<string, TSubjectToAlignSet> TAssemblyToSubjectSet; 71 72 73 CQuerySet(const blast::CSearchResults& Results); 74 CQuerySet(const objects::CSeq_align_set& Results); 75 CQuerySet(CRef<objects::CSeq_align> Alignment); 76 77 CQuerySet(const blast::CSearchResults& Results, CRef<objects::CGC_Assembly> GenColl, bool AllowDupes=false); 78 CQuerySet(const objects::CSeq_align_set& Results, CRef<objects::CGC_Assembly> GenColl, bool AllowDupes=false); 79 CQuerySet(CRef<objects::CSeq_align> Alignment, CRef<objects::CGC_Assembly> GenColl, bool AllowDupes=false); 80 81 Get()82 TAssemblyToSubjectSet& Get() { return m_AssemblyMap; } Get() const83 const TAssemblyToSubjectSet& Get() const { return m_AssemblyMap; } 84 85 //TSubjectToAlignSet& Get() { return m_SubjectMap; } 86 //const TSubjectToAlignSet& Get() const { return m_SubjectMap; } 87 88 CRef<objects::CSeq_align_set> ToSeqAlignSet() const; 89 CRef<objects::CSeq_align_set> ToBestSeqAlignSet() const; 90 GetQueryId() const91 CConstRef<objects::CSeq_id> GetQueryId() const { return m_QueryId; } 92 93 void Insert(CRef<CQuerySet> QuerySet); 94 void Insert(const objects::CSeq_align_set& AlignSet); 95 void Insert(CRef<objects::CSeq_align> Alignment); 96 97 // gets the rank of the best (lowest) ranked alignment in this query set 98 int GetBestRank(const string AssemblyAcc = "") const; 99 100 private: 101 102 TSubjectToAlignSet m_SubjectMap; 103 CRef<objects::CSeq_id> m_QueryId; 104 105 CRef<objects::CGC_Assembly> m_GenColl; 106 bool m_AllowDupes; 107 108 TAssemblyToSubjectSet m_AssemblyMap; 109 110 bool x_AlreadyContains(const objects::CSeq_align_set& Set, 111 const objects::CSeq_align& New) const; 112 113 void x_FilterStrictSubAligns(objects::CSeq_align_set& Source) const; 114 115 // True if Outer strictly contains Inner 116 bool x_ContainsAlignment(const objects::CSeq_align& Outer, 117 const objects::CSeq_align& Inner) const; 118 }; 119 120 121 class CAlignResultsSet : public CObject 122 { 123 public: 124 typedef map<string, CRef<CQuerySet> > TQueryToSubjectSet; 125 126 CAlignResultsSet(); 127 CAlignResultsSet(bool AllowDupes); 128 CAlignResultsSet(CRef<objects::CGC_Assembly> Gencoll, 129 bool AllowDupes = false); 130 CAlignResultsSet(const blast::CSearchResultSet& BlastResults); 131 Get()132 TQueryToSubjectSet& Get() { return m_QueryMap; } Get() const133 const TQueryToSubjectSet& Get() const { return m_QueryMap; } 134 135 bool QueryExists(const objects::CSeq_id& Id) const; 136 CRef<CQuerySet> GetQuerySet(const objects::CSeq_id& Id); 137 CConstRef<CQuerySet> GetQuerySet(const objects::CSeq_id& Id) const; 138 139 140 CRef<objects::CSeq_align_set> ToSeqAlignSet() const; 141 CRef<objects::CSeq_align_set> ToBestSeqAlignSet() const; 142 143 void Insert(CRef<CQuerySet> QuerySet); 144 void Insert(CRef<CAlignResultsSet> AlignSet); 145 void Insert(const blast::CSearchResultSet& BlastResults); 146 void Insert(CRef<objects::CSeq_align> Alignment); 147 void Insert(const objects::CSeq_align_set& AlignSet); 148 149 150 size() const151 size_t size() const { return m_QueryMap.size(); } empty() const152 bool empty() const { return m_QueryMap.empty(); } 153 154 private: 155 156 bool m_AllowDupes; 157 158 TQueryToSubjectSet m_QueryMap; 159 160 CRef<objects::CGC_Assembly> m_GenColl; 161 162 // the one priveledged case that gets to use DropQuery() 163 164 friend class CSplitSeqAlignMerger; 165 // drops a given query from the result set. 166 // Primarily for Split sequences, once the original is inserted, 167 // drop the split subsequences 168 void DropQuery(const objects::CSeq_id& Id); 169 170 }; 171 172 173 174 175 176 END_NCBI_SCOPE 177 178 #endif 179