1 #ifndef NGALIGN_RESULT_SET__HPP
2 #define NGALIGN_RESULT_SET__HPP
3 
4 /*  $Id: result_set.hpp 499694 2016-04-27 17:23:05Z boukn $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors:  Nathan Bouk
30  *
31  * File Description:
32  *
33  */
34 
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbiobj.hpp>
37 #include <objects/seqloc/Na_strand.hpp>
38 
39 #include <objects/seqloc/Seq_loc.hpp>
40 #include <objects/seqloc/Seq_id.hpp>
41 #include <objects/genomecoll/GC_Assembly.hpp>
42 #include <objmgr/scope.hpp>
43 
44 #include <algo/align/util/align_filter.hpp>
45 
46 
47 
48 BEGIN_NCBI_SCOPE
49 
50 BEGIN_SCOPE(objects)
51     class CScope;
52     class CSeq_align;
53     class CSeq_align_set;
54 END_SCOPE(objects)
55 
56 BEGIN_SCOPE(blast)
57     class CSearchResultSet;
58     class CSearchResults;
59 END_SCOPE(blast)
60 
61 class CSplitSeqAlignMerger;
62 
63 
64 // Stores one querys worth of alignments
65 class CQuerySet : public CObject
66 {
67 public:
68 
69     typedef map<string, CRef<objects::CSeq_align_set> > TSubjectToAlignSet;
70     typedef map<string, TSubjectToAlignSet> TAssemblyToSubjectSet;
71 
72 
73     CQuerySet(const blast::CSearchResults& Results);
74     CQuerySet(const objects::CSeq_align_set& Results);
75     CQuerySet(CRef<objects::CSeq_align> Alignment);
76 
77   	CQuerySet(const blast::CSearchResults& Results, CRef<objects::CGC_Assembly> GenColl, bool AllowDupes=false);
78     CQuerySet(const objects::CSeq_align_set& Results, CRef<objects::CGC_Assembly> GenColl, bool AllowDupes=false);
79     CQuerySet(CRef<objects::CSeq_align> Alignment, CRef<objects::CGC_Assembly> GenColl, bool AllowDupes=false);
80 
81 
Get()82 	TAssemblyToSubjectSet& Get() { return m_AssemblyMap; }
Get() const83     const TAssemblyToSubjectSet& Get() const { return m_AssemblyMap; }
84 
85     //TSubjectToAlignSet& Get() { return m_SubjectMap; }
86     //const TSubjectToAlignSet& Get() const { return m_SubjectMap; }
87 
88     CRef<objects::CSeq_align_set> ToSeqAlignSet() const;
89     CRef<objects::CSeq_align_set> ToBestSeqAlignSet() const;
90 
GetQueryId() const91     CConstRef<objects::CSeq_id> GetQueryId() const { return m_QueryId; }
92 
93     void Insert(CRef<CQuerySet> QuerySet);
94     void Insert(const objects::CSeq_align_set& AlignSet);
95     void Insert(CRef<objects::CSeq_align> Alignment);
96 
97     // gets the rank of the best (lowest) ranked alignment in this query set
98     int GetBestRank(const string AssemblyAcc = "") const;
99 
100 private:
101 
102     TSubjectToAlignSet m_SubjectMap;
103     CRef<objects::CSeq_id> m_QueryId;
104 
105 	CRef<objects::CGC_Assembly> m_GenColl;
106     bool m_AllowDupes;
107 
108 	TAssemblyToSubjectSet m_AssemblyMap;
109 
110     bool x_AlreadyContains(const objects::CSeq_align_set& Set,
111                            const objects::CSeq_align& New) const;
112 
113     void x_FilterStrictSubAligns(objects::CSeq_align_set& Source) const;
114 
115     // True if Outer strictly contains Inner
116     bool x_ContainsAlignment(const objects::CSeq_align& Outer,
117                              const objects::CSeq_align& Inner) const;
118 };
119 
120 
121 class CAlignResultsSet : public CObject
122 {
123 public:
124     typedef map<string, CRef<CQuerySet> > TQueryToSubjectSet;
125 
126 	CAlignResultsSet();
127 	CAlignResultsSet(bool AllowDupes);
128     CAlignResultsSet(CRef<objects::CGC_Assembly> Gencoll,
129                      bool AllowDupes = false);
130     CAlignResultsSet(const blast::CSearchResultSet& BlastResults);
131 
Get()132     TQueryToSubjectSet& Get() { return m_QueryMap; }
Get() const133     const TQueryToSubjectSet& Get() const { return m_QueryMap; }
134 
135     bool QueryExists(const objects::CSeq_id& Id) const;
136     CRef<CQuerySet> GetQuerySet(const objects::CSeq_id& Id);
137     CConstRef<CQuerySet> GetQuerySet(const objects::CSeq_id& Id) const;
138 
139 
140     CRef<objects::CSeq_align_set> ToSeqAlignSet() const;
141     CRef<objects::CSeq_align_set> ToBestSeqAlignSet() const;
142 
143     void Insert(CRef<CQuerySet> QuerySet);
144     void Insert(CRef<CAlignResultsSet> AlignSet);
145     void Insert(const blast::CSearchResultSet& BlastResults);
146     void Insert(CRef<objects::CSeq_align> Alignment);
147     void Insert(const objects::CSeq_align_set& AlignSet);
148 
149 
150 
size() const151     size_t size() const { return m_QueryMap.size(); }
empty() const152     bool empty() const { return m_QueryMap.empty(); }
153 
154 private:
155 
156     bool m_AllowDupes;
157 
158     TQueryToSubjectSet m_QueryMap;
159 
160     CRef<objects::CGC_Assembly> m_GenColl;
161 
162     // the one priveledged case that gets to use DropQuery()
163 
164     friend class CSplitSeqAlignMerger;
165     // drops a given query from the result set.
166     // Primarily for Split sequences, once the original is inserted,
167     // drop the split subsequences
168     void DropQuery(const objects::CSeq_id& Id);
169 
170 };
171 
172 
173 
174 
175 
176 END_NCBI_SCOPE
177 
178 #endif
179