1 /*  $Id: compare_feats.hpp 542379 2017-07-31 13:06:45Z dicuccio $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Alex Astashyn
27  *
28  * File Description:
29  *
30  */
31 
32 #ifndef COMPARE_FEATS_HPP_
33 #define COMPARE_FEATS_HPP_
34 
35 #include <vector>
36 #include <map>
37 #include <set>
38 
39 #include <objmgr/scope.hpp>
40 #include <objmgr/feat_ci.hpp>
41 #include <objmgr/util/sequence.hpp>
42 #include <objmgr/util/seq_loc_util.hpp>
43 #include <objmgr/seq_loc_mapper.hpp>
44 #include <objmgr/util/feature.hpp>
45 
46 #include "loc_mapper.hpp"
47 
48 
49 BEGIN_NCBI_SCOPE
50 USING_SCOPE(objects);
51 
52 /// CCompareSeq_locs is used for comparing locations of two features on the same coordinate system
53 /// It is agnostic to what type of feature it is and only compares the internal structure of the locs.
54 class NCBI_XALGOSEQ_EXPORT CCompareSeq_locs : public CObject
55 {
56 public:
57     typedef int TCompareLocsFlags;
58     enum FCompareLocs {
59         fCmp_Unknown                = 1 << 0,  ///< failed to compare
60         fCmp_Incomplete             = 1 << 1,  ///< Some parts of the location could not be compared (e.g. on different sequence)
61         fCmp_NoOverlap              = 1 << 2,  ///< seq_locs do not overlap at all
62         fCmp_RegionOverlap          = 1 << 3,  ///< overlap of the extremes
63         fCmp_Overlap                = 1 << 4,  ///< at least one interval overlaps
64         fCmp_Subset                 = 1 << 5,  ///< comparison loc is a subset of the reference loc; some interval boundaries do not match
65         fCmp_Superset               = 1 << 6,  ///< comparison loc is a superset of the reference loc; some interval boundaries do not match
66         fCmp_intsMissing_internal   = 1 << 7,  ///< comparison loc is missing interval(s) internally
67         fCmp_intsExtra_internal     = 1 << 8,  ///< comparinos loc has extra interval(s) internally
68         fCmp_intsMissing_3p         = 1 << 9,  ///< comparison loc is missing interval(s) at 3' end
69         fCmp_intsExtra_3p           = 1 << 10, ///< comparinos loc has extra interval(s) at 3' end
70         fCmp_intsMissing_5p         = 1 << 11, ///< comparison loc is missing interval(s) at 5' end
71         fCmp_intsExtra_5p           = 1 << 12, ///< comparinos loc has extra interval(s) at 5' end
72         fCmp_3pExtension            = 1 << 13, ///< 3' terminal interval extended (other splice junction matches)
73         fCmp_3pTruncation           = 1 << 14, ///< 3' terminal interval truncated (other splice junction matches)
74         fCmp_5pExtension            = 1 << 15, ///< 5' terminal interval extended (other splice junction matches)
75         fCmp_5pTruncation           = 1 << 16, ///< 5' terminal interval truncated (other splice junction matches)
76         fCmp_StrandDifferent        = 1 << 17, ///< different strand
77         fCmp_FuzzDifferent          = 1 << 18, ///< indicates fuzz mismatch if set
78         fCmp_Match                  = 1 << 19  ///< all junctions match (fuzz-agnostic)
79     };
80 
81     enum EOverlapMethod {
82         eOverlap_vs_Union,    ///< overlap versus the union of the two features
83         eOverlap_vs_Shorter,  ///< overlap versus the shorter of the two features
84         eOverlap_vs_First,    ///< overlap versus the first of the two features
85         eOverlap_vs_Second    ///< overlap versus the second of the two features
86     };
87 
88     typedef int TCompareFlags;
89     enum FCompareFlags {
90         fCmp_IgnoreStrand    = 1 << 0,  ///< strand-less comparison
91         fCmp_Defaults        = 0
92     };
93 
94     //This struct keeps the result of comparison of two exons
95     struct SIntervalComparisonResult : CObject
96     {
97     public:
SIntervalComparisonResultCCompareSeq_locs::SIntervalComparisonResult98         SIntervalComparisonResult(unsigned pos1, unsigned pos2, FCompareLocs result, int pos_comparison = 0)
99             : m_exon_ordinal1(pos1), m_exon_ordinal2(pos2), m_result(result), m_position_comparison(pos_comparison) {}
SIntervalComparisonResultCCompareSeq_locs::SIntervalComparisonResult100         SIntervalComparisonResult() { SIntervalComparisonResult(0, 0, fCmp_Unknown, 0); }
101 
102 
missing_firstCCompareSeq_locs::SIntervalComparisonResult103         inline bool missing_first() const {return m_exon_ordinal1 == 0;}
missing_secondCCompareSeq_locs::SIntervalComparisonResult104         inline bool missing_second() const {return m_exon_ordinal2 == 0;}
105 
106         unsigned m_exon_ordinal1;
107         unsigned m_exon_ordinal2;
108         FCompareLocs m_result;
109         int m_position_comparison; //we need to know which exon is "ahead" so the overlaps can be correctly reported,
110                                    //e.g. 1:>1>  vs. 1:<1<
111     };
112 
CCompareSeq_locs(const CSeq_loc & loc1,const CSeq_loc & loc2,CScope * scope2,TCompareFlags flags=fCmp_Defaults)113     CCompareSeq_locs(const CSeq_loc& loc1, const CSeq_loc& loc2, CScope* scope2, TCompareFlags flags = fCmp_Defaults)
114         : m_scope_t(scope2)
115         , m_flags(flags)
116     {
117         m_loc1 = CRef<CSeq_loc>(new CSeq_loc);
118         m_loc1->Assign(loc1);
119         m_loc2 = CRef<CSeq_loc>(new CSeq_loc);
120         m_loc2->Assign(loc2);
121         if ( m_flags & fCmp_IgnoreStrand ) {
122             m_loc1->ResetStrand();
123             m_loc2->ResetStrand();
124         }
125         this->Reset();
126     }
127 
128     /// Reset cached comparison results
Reset()129     void Reset()
130     {
131         this->m_cachedOverlapValues = false;
132         this->x_Compare();
133     }
134 
135     /// Symmetrical overlap is defined as length(intersection(loc1, loc2) / (length(loc1) + length(loc2))
136     /// intra-loc overlaps are merged (otherwise non-sensical results are possible)
GetSymmetricalOverlap() const137     double GetSymmetricalOverlap() const
138     {
139         return GetOverlap(eOverlap_vs_Union);
140     }
141 
142     /// Relative overlap is defined as ratio of the length of the overlap to the length of the shorter feature
GetRelativeOverlap() const143     double GetRelativeOverlap() const
144     {
145         return GetOverlap(eOverlap_vs_Shorter);
146     }
147 
148     /// Calculate overlap according to the specified method.
GetOverlap(EOverlapMethod method) const149     double GetOverlap(EOverlapMethod method) const
150     {
151         if(!m_cachedOverlapValues) {
152             x_ComputeOverlapValues();
153         }
154 
155         TSeqPos denom = 0;
156 
157         switch(method) {
158             case eOverlap_vs_Union:
159                 denom = m_len_seqloc1 + m_len_seqloc2 - m_len_seqloc_overlap;
160                 break;
161             case eOverlap_vs_Shorter:
162                 denom = static_cast<TSeqPos>(std::min(m_len_seqloc1, m_len_seqloc2));
163                 break;
164             case eOverlap_vs_First:
165                 denom = m_len_seqloc1;
166                 break;
167             case eOverlap_vs_Second:
168                 denom = m_len_seqloc2;
169                 break;
170         }
171 
172         return (denom == 0) ? 0.0 : (static_cast<double>(m_len_seqloc_overlap) / denom);
173     }
174 
GetSplicingSimilarity(float & score,int * loc1_intervals=NULL,int * loc2_intervals=NULL) const175     void GetSplicingSimilarity(float& score, int* loc1_intervals = NULL, int* loc2_intervals = NULL) const
176     {
177         if(!m_cachedOverlapValues) {
178             x_ComputeOverlapValues();
179         }
180 
181         score = m_shared_sites_score;
182         if(loc1_intervals) {
183             *loc1_intervals = m_loc1_interval_count;
184         }
185         if(loc2_intervals) {
186             *loc2_intervals = m_loc2_interval_count;
187         }
188     }
189 
190 
191     /// str_out will contain human-readable summary of the internal comparison
192     TCompareLocsFlags GetResult(string* str_out = NULL) const;
193 
194     /// The evidence string is a whitespace-separated list of exon comparisons
195     /// Each exon comparison is a pair of exon ordinals (on query and target features)
196     /// separated by a colon; target exon ordinal may contain splice junction "operators"
197     /// that establish relationship to the query exon.
198     ///
199     /// '>' and '<' denote the splice junction shifts in 3' and 5' direction respectively
200     /// (relative to the master sequence). E.g.
201     /// '4:4'   = 4th exon matches exactly on both sequneces
202     /// '4:>4>' = 4th exon shifted in 3' direction relative to the overlapping query exon
203     /// '4:<4>' = 4th exon extended in both directions relative to the overlapping query exon
204     /// '4:<4'  = 4th exon has 5' junction extended relative to the overlapping query exon
205     /// etc.
206     ///
207     /// '~' are sentinels for non-overlapping exons. e.g.
208     /// '5:~' = 5th exon on query location is unmatched;
209     /// '~:5' = 5th exon on target location is unmatched.
210     ///
211     /// neighboring exon comparisons of the same class are collapsed in groups:
212     /// '5-20:4-19' = exons 5 to 20 match exons 4 to 19 on target
213     /// '21-23:~'   = exons 21 to 23 do not overlap the target.
214     ///
215     ///
216     /// The exon ordinals are numbered by their position within the feature.
217     string GetEvidenceString() const;
218 
219     /// Return the vector of individual exon comparisons
GetIndividualComparisons() const220     const vector<SIntervalComparisonResult>& GetIndividualComparisons() const
221     {
222         return m_IntComparisons;
223     }
224 
225 private:
226 
227     /// This helper struct is used to accumulate the neighboring comparisons
228     /// of the same class, such that the comparison [... 3:5  4:6 ... 20:22 ...]
229     /// can be represented as [... 3-20:5-22 ...]
230     struct SIntervalComparisonResultGroup
231     {
232     public:
SIntervalComparisonResultGroupCCompareSeq_locs::SIntervalComparisonResultGroup233         SIntervalComparisonResultGroup(bool isReverse)
234             : m_first(0, 0, fCmp_Unknown, 0)
235             , m_last(0, 0, fCmp_Unknown, 0)
236             , m_isReverse(isReverse)
237         {}
238 
239         string ToString();
240 
IsValidCCompareSeq_locs::SIntervalComparisonResultGroup241         bool IsValid() {
242             return !(m_first.m_exon_ordinal1 == 0 && m_first.m_exon_ordinal2 == 0 && m_last.m_exon_ordinal1 == 0 && m_last.m_exon_ordinal2 == 0);
243         }
244 
ResetCCompareSeq_locs::SIntervalComparisonResultGroup245         void Reset(const SIntervalComparisonResult& r)
246         {
247             m_first = r;
248             m_last = r;
249         }
250 
251         /// if the comparison is neighboring and of the same class, set the terminal compariosn to it
252         /// and return true; otherwise return false
AddCCompareSeq_locs::SIntervalComparisonResultGroup253         bool Add(const SIntervalComparisonResult& r)
254         {
255             if(r.m_position_comparison == m_last.m_position_comparison
256                && r.m_result == m_last.m_result
257                && ((!m_isReverse && r.m_exon_ordinal1 == m_last.m_exon_ordinal1 + 1)
258                   || (m_isReverse && r.m_exon_ordinal1 == m_last.m_exon_ordinal1 - 1)
259                   || (r.m_exon_ordinal1 == 0 && m_last.m_exon_ordinal1 == 0))
260                && ((!m_isReverse && r.m_exon_ordinal2 == m_last.m_exon_ordinal2 + 1)
261                   || (m_isReverse && r.m_exon_ordinal2 == m_last.m_exon_ordinal2 - 1)
262                   || (r.m_exon_ordinal2 == 0 && m_last.m_exon_ordinal2 == 0)))
263             {
264                 m_last = r;
265                 return true;
266             } else {
267                 return false;
268             }
269         }
270 
271     private:
272 
273         SIntervalComparisonResult m_first;
274         SIntervalComparisonResult m_last;
275         bool m_isReverse;
276     };
277 
278 
279     //this struct is jusst a wrapper to keep the counts together
280     struct ResultCounts
281     {
ResultCountsCCompareSeq_locs::ResultCounts282         ResultCounts() :
283             loc1_int(0),
284             loc2_int(0),
285             matched(0),
286             partially_matched(0),
287             unknown(0),
288             extra(0),
289             missing(0),
290             missing_3p(0),
291             extra_3p(0),
292             missing_5p(0),
293             extra_5p(0) {}
294 
missing_internalCCompareSeq_locs::ResultCounts295         inline unsigned missing_internal() const {return missing - (missing_3p + missing_5p); }
extra_internalCCompareSeq_locs::ResultCounts296         inline unsigned extra_internal() const {return extra - (extra_3p + extra_5p); }
297 
298         unsigned loc1_int;
299         unsigned loc2_int;
300         unsigned matched;
301         unsigned partially_matched; //ext|trunc|overlap|subset|superset
302         unsigned unknown;
303         unsigned extra;             //extra exons (5'+internal+3')
304         unsigned missing;           //missing exons (5'+internal+3')
305         unsigned missing_3p;
306         unsigned extra_3p;
307         unsigned missing_5p;
308         unsigned extra_5p;
309     };
310 
311     /// Process the seq_locs and generate the m_IntComparisons vector; Recompute the counts
312     void x_Compare();
313 
314     /// Recompute m_len_seqloc_overlap, m_len_seqloc1, and m_len_seqloc2
315     void x_ComputeOverlapValues() const;
316 
317     /// Compare two exons
318     FCompareLocs x_CompareInts(const CSeq_loc& loc1, const CSeq_loc& loc2) const;
319 
320 
321     ResultCounts m_counts;
322     bool m_sameStrand;
323     bool m_sameBioseq;
324 
325     mutable bool m_cachedOverlapValues;
326     mutable TSeqPos m_len_seqloc_overlap;
327     mutable TSeqPos m_len_seqloc1;
328     mutable TSeqPos m_len_seqloc2;
329     mutable int m_loc1_interval_count;
330     mutable int m_loc2_interval_count;
331     mutable float m_shared_sites_score;
332 
333 
334     vector<SIntervalComparisonResult> m_IntComparisons;
335     CRef<CSeq_loc> m_loc1;
336     CRef<CSeq_loc> m_loc2;
337     CScope* m_scope_t;
338     TCompareFlags m_flags;
339 
340 };
341 
342 
343 
344 
345 
346 /// CCompareFeats represens a result of comparison of two features.
347 /// (CCompareFeats::m_compare stores the actual result)
348 /// These comparisons will be produces by CCompare_Regions
349 class NCBI_XALGOSEQ_EXPORT CCompareFeats : public CObject
350 {
351 public:
CCompareFeats(const CSeq_feat & feat1,const CSeq_loc & feat1_mapped_loc,double mapped_identity,const CSeq_loc & feat1_self_loc,CScope * scope1,const CSeq_feat & feat2,const CSeq_loc & feat2_self_loc,CScope * scope2)352     CCompareFeats(const CSeq_feat& feat1
353                 , const CSeq_loc& feat1_mapped_loc
354                 , double mapped_identity
355                 , const CSeq_loc& feat1_self_loc
356                 , CScope* scope1
357                 , const CSeq_feat& feat2
358                 , const CSeq_loc& feat2_self_loc
359                 , CScope* scope2)
360         : m_feat1(&feat1)
361         , m_feat1_mapped_loc(&feat1_mapped_loc)
362         , m_feat1_self_loc(&feat1_self_loc)
363         , m_scope_q(scope1)
364         , m_feat2(&feat2)
365         , m_feat2_self_loc(&feat2_self_loc)
366         , m_scope_t(scope2)
367         , m_compare(new CCompareSeq_locs(feat1_mapped_loc, feat2_self_loc, scope2)) // feat1_mapped_loc lives in scope2
368         , m_irrelevance(0)
369         , m_mapped_identity(mapped_identity)
370     {}
371 
372 
373     /// No matching feat2
CCompareFeats(const CSeq_feat & feat1,const CSeq_loc & feat1_mapped_loc,double mapped_identity,const CSeq_loc & feat1_self_loc,CScope * scope1)374     CCompareFeats(const CSeq_feat& feat1
375                 , const CSeq_loc& feat1_mapped_loc ///mapped to feat2's coordinate system
376                 , double mapped_identity
377                 , const CSeq_loc& feat1_self_loc
378                 , CScope* scope1)
379         : m_feat1(&feat1)
380         , m_feat1_mapped_loc(&feat1_mapped_loc)
381         , m_feat1_self_loc(&feat1_self_loc)
382         , m_scope_q(scope1)
383         , m_irrelevance(1) //Forward
384         , m_mapped_identity(mapped_identity)
385     {}
386 
387     /// No matching feat1
CCompareFeats(const CSeq_feat & feat2,const CSeq_loc & feat2_self_loc,double mapped_identity,CScope * scope2)388     CCompareFeats(const CSeq_feat& feat2, const CSeq_loc& feat2_self_loc, double mapped_identity, CScope* scope2)
389         : m_feat2(&feat2)
390         , m_feat2_self_loc(&feat2_self_loc)
391         , m_scope_t(scope2)
392         , m_irrelevance(2) //Reverse
393         , m_mapped_identity(mapped_identity)
394     {}
395 
396 
397     friend CNcbiOstream& operator<<(CNcbiOstream& out, const CCompareFeats& cf);
398 
399 
400 
401 
GetMappedIdentity() const402     double GetMappedIdentity() const
403     {
404         return m_mapped_identity;
405     }
406 
407     // Return true iff features being compared are of the same subtype
IsSameSubtype() const408     bool IsSameSubtype() const
409     {
410         return IsMatch()
411             && m_feat1->CanGetData()
412             && m_feat2->CanGetData()
413             && (m_feat1->GetData().GetSubtype() == m_feat2->GetData().GetSubtype());
414     }
415 
IsSameType() const416     bool IsSameType() const
417     {
418         return IsMatch()
419             && m_feat1->CanGetData()
420             && m_feat2->CanGetData()
421             && (m_feat1->GetData().Which() == m_feat2->GetData().Which());
422     }
423 
424     // Return true iff labels are the same and fCmp_Match flag is set in the comparison result
IsIdentical() const425     bool IsIdentical() const
426     {
427         return IsMatch()
428             && (CCompareFeats::s_GetFeatLabel(*m_feat1) == CCompareFeats::s_GetFeatLabel(*m_feat2))
429             && (m_compare->GetResult() & CCompareSeq_locs::fCmp_Match);
430     }
431 
s_GetLocLabel(const CSeq_loc & loc,bool merged=false)432     static string s_GetLocLabel(const CSeq_loc& loc, bool merged = false)
433     {
434         string s = "";
435 
436         if(!merged) {
437             loc.GetLabel(&s);
438         } else {
439             CRef<CSeq_loc> merged_loc = sequence::Seq_loc_Merge(loc, CSeq_loc::fMerge_SingleRange, NULL);
440             merged_loc->GetLabel(&s);
441         }
442         return s;
443     }
444 
s_GetFeatLabel(const CSeq_feat & gene_feat,feature::TFeatLabelFlags type=feature::fFGL_Both)445     static string s_GetFeatLabel(const CSeq_feat& gene_feat, feature::TFeatLabelFlags type = feature::fFGL_Both)
446     {
447         string gene_label = "";
448         feature::GetLabel(gene_feat, &gene_label, type, NULL);
449         return gene_label;
450     }
451 
452 
GetFeatQ() const453     CConstRef<CSeq_feat> GetFeatQ() const {return m_feat1;}
GetFeatT() const454     CConstRef<CSeq_feat> GetFeatT() const {return m_feat2;}
455 
GetMappedLocQ() const456     CConstRef<CSeq_loc> GetMappedLocQ() const {return m_feat1_mapped_loc;}
GetSelfLocQ() const457     CConstRef<CSeq_loc> GetSelfLocQ() const {return m_feat1_self_loc;}
GetSelfLocT() const458     CConstRef<CSeq_loc> GetSelfLocT() const {return m_feat2_self_loc;}
459 
IsMatch() const460     bool IsMatch() const {return !m_feat1.IsNull() && !m_feat2.IsNull();}
GetComparison() const461     CConstRef<CCompareSeq_locs> GetComparison() const {return m_compare;}
462 
463 
GetIrrelevance() const464     int GetIrrelevance() const {return m_irrelevance; }
SetIrrelevance(int val)465     void SetIrrelevance(int  val) {m_irrelevance =val;}
466 private:
467     CConstRef<CSeq_feat> m_feat1;
468     CConstRef<CSeq_loc> m_feat1_mapped_loc;
469     CConstRef<CSeq_loc> m_feat1_self_loc;
470     CScope* m_scope_q;
471 
472 
473     CConstRef<CSeq_feat> m_feat2;
474     CConstRef<CSeq_loc> m_feat2_self_loc;
475     CScope* m_scope_t;
476 
477 
478     CRef<CCompareSeq_locs> m_compare;
479     int m_irrelevance;
480     bool m_unmatched;
481     double m_mapped_identity;
482 };
483 
484 ///////////////////////////////////////////////////////////////////////////////
485 
486 /// Compare multiple feature annotations on the specified seq_locs.
487 class NCBI_XALGOSEQ_EXPORT CCompareSeqRegions : public CObject
488 {
489 public:
490     enum EScoreMethod {
491         eScore_SymmetricPctOverlap      ///< length of overlap / (sum of lengths - overlaps)
492         , eScore_Feat1PctOverlap        ///< length of overlap / (length of 1st feat)
493         , eScore_Feat2PctOverlap        ///< length of overlap / (length of 2nd feat)
494     };
495 
496     enum EComparisonOptions {
497         fSelectBest                 = (1<<0),
498         fMergeExons                 = (1<<1),
499         fDifferentGenesOnly         = (1<<2),
500         fCreateSentinelGenes        = (1<<3),
501         fSameTypeOnly               = (1<<4)
502     };
503 
504     typedef int TComparisonOptions;
505 
506 
CCompareSeqRegions(const CSeq_loc & query_loc,CScope * q_scope,CScope * t_scope,ILocMapper & mapper,const SAnnotSelector & q_sel,const SAnnotSelector & t_sel,const CSeq_id & target_id,TComparisonOptions options=fSelectBest|fMergeExons,EScoreMethod score_method=eScore_SymmetricPctOverlap)507     CCompareSeqRegions(const CSeq_loc& query_loc
508                      , CScope* q_scope
509                      , CScope* t_scope
510                      , ILocMapper& mapper
511                      , const SAnnotSelector& q_sel
512                      , const SAnnotSelector& t_sel
513                      , const CSeq_id& target_id
514                      , TComparisonOptions options = fSelectBest|fMergeExons
515                      , EScoreMethod score_method = eScore_SymmetricPctOverlap)
516         : m_loc_q(&query_loc)
517         , m_scope_q(q_scope)
518         , m_scope_t(t_scope)
519         , m_mapper(&mapper)
520         , m_selector_q(q_sel)
521         , m_selector_t(t_sel)
522         , m_target_id(&target_id)
523         , m_comp_options(options)
524         , m_score_method(score_method)
525         , m_loc_q_ci(*m_scope_q, *m_loc_q, q_sel)
526         , m_already_processed_unmatched_targets(false)
527     {
528 
529         //when initializing the whole_locs, we use maximally large intervals
530         //instead of Whole seqloc type because Seq_loc_Mapper can't digest those
531         //in the case of incomplete scopes, such as pre-locuslink LDS type
532         CRef<CSeq_loc> t_whole_loc(new CSeq_loc);
533         CRef<CSeq_id> t_id(new CSeq_id);
534         t_id->Assign(target_id);
535         t_whole_loc->SetInt().SetId(*t_id);
536         t_whole_loc->SetInt().SetFrom(0);
537         t_whole_loc->SetInt().SetTo(((TSeqPos) (-10)));
538 
539 
540         CRef<CSeq_loc> q_whole_loc(new CSeq_loc);
541         CRef<CSeq_id> q_id(new CSeq_id);
542         q_id->Assign(sequence::GetId(query_loc, 0));
543         q_whole_loc->SetInt().SetId(*q_id);
544         q_whole_loc->SetInt().SetFrom(0);
545         q_whole_loc->SetInt().SetTo(((TSeqPos) (-10)));
546 
547 
548         m_self_mapper_q.Reset(new CSeq_loc_Mapper(*q_whole_loc, *q_whole_loc, m_scope_q));
549         m_self_mapper_t.Reset(new CSeq_loc_Mapper(*t_whole_loc, *t_whole_loc, m_scope_t));
550 
551         m_seen_targets.clear();
552     }
553 
554 
Rewind()555     void Rewind() {
556         m_loc_q_ci.Rewind();
557         m_seen_targets.clear();
558     }
559 
SetOptions()560     TComparisonOptions& SetOptions() {return m_comp_options;}
GetOptions() const561     TComparisonOptions GetOptions() const {return m_comp_options;}
562 
GetQueryLoc() const563     const CSeq_loc& GetQueryLoc() const {return *m_loc_q;}
564 
565     bool NextComparisonGroup(vector<CRef<CCompareFeats> >& v);
566     void SelectMatches(vector<CRef<CCompareFeats> >& v);
567     static int s_GetGeneId(const CSeq_feat& feat);
568 private:
569     void x_GetPutativeMatches(vector<CRef<CCompareFeats> >& v, CConstRef<CSeq_feat> q_feat);
570     CConstRef<CSeq_loc> x_GetSelfLoc(
571             const CSeq_loc& loc,
572             CScope* scope,
573             bool merge_single_range);
574 
575 
576 
577     CConstRef<CSeq_loc> m_loc_q;
578     CScope* m_scope_q;
579     CScope* m_scope_t;
580     CRef<ILocMapper> m_mapper;
581     const SAnnotSelector& m_selector_q;
582     const SAnnotSelector& m_selector_t;
583     CConstRef<CSeq_id> m_target_id;
584     CRef<CSeq_loc_Mapper> m_self_mapper_q;
585     CRef<CSeq_loc_Mapper> m_self_mapper_t;
586     TComparisonOptions m_comp_options;
587     EScoreMethod m_score_method;
588     CFeat_CI m_loc_q_ci;
589     std::set<std::string> m_seen_targets;
590         //loc-labels of all target features that have been compared
591         //(we use it to collect the target features that are not comparable at the end)
592     bool m_already_processed_unmatched_targets;
593 };
594 
595 
596 END_NCBI_SCOPE
597 
598 #endif
599