1 /* $Id: compare_feats.hpp 542379 2017-07-31 13:06:45Z dicuccio $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Authors: Alex Astashyn 27 * 28 * File Description: 29 * 30 */ 31 32 #ifndef COMPARE_FEATS_HPP_ 33 #define COMPARE_FEATS_HPP_ 34 35 #include <vector> 36 #include <map> 37 #include <set> 38 39 #include <objmgr/scope.hpp> 40 #include <objmgr/feat_ci.hpp> 41 #include <objmgr/util/sequence.hpp> 42 #include <objmgr/util/seq_loc_util.hpp> 43 #include <objmgr/seq_loc_mapper.hpp> 44 #include <objmgr/util/feature.hpp> 45 46 #include "loc_mapper.hpp" 47 48 49 BEGIN_NCBI_SCOPE 50 USING_SCOPE(objects); 51 52 /// CCompareSeq_locs is used for comparing locations of two features on the same coordinate system 53 /// It is agnostic to what type of feature it is and only compares the internal structure of the locs. 54 class NCBI_XALGOSEQ_EXPORT CCompareSeq_locs : public CObject 55 { 56 public: 57 typedef int TCompareLocsFlags; 58 enum FCompareLocs { 59 fCmp_Unknown = 1 << 0, ///< failed to compare 60 fCmp_Incomplete = 1 << 1, ///< Some parts of the location could not be compared (e.g. on different sequence) 61 fCmp_NoOverlap = 1 << 2, ///< seq_locs do not overlap at all 62 fCmp_RegionOverlap = 1 << 3, ///< overlap of the extremes 63 fCmp_Overlap = 1 << 4, ///< at least one interval overlaps 64 fCmp_Subset = 1 << 5, ///< comparison loc is a subset of the reference loc; some interval boundaries do not match 65 fCmp_Superset = 1 << 6, ///< comparison loc is a superset of the reference loc; some interval boundaries do not match 66 fCmp_intsMissing_internal = 1 << 7, ///< comparison loc is missing interval(s) internally 67 fCmp_intsExtra_internal = 1 << 8, ///< comparinos loc has extra interval(s) internally 68 fCmp_intsMissing_3p = 1 << 9, ///< comparison loc is missing interval(s) at 3' end 69 fCmp_intsExtra_3p = 1 << 10, ///< comparinos loc has extra interval(s) at 3' end 70 fCmp_intsMissing_5p = 1 << 11, ///< comparison loc is missing interval(s) at 5' end 71 fCmp_intsExtra_5p = 1 << 12, ///< comparinos loc has extra interval(s) at 5' end 72 fCmp_3pExtension = 1 << 13, ///< 3' terminal interval extended (other splice junction matches) 73 fCmp_3pTruncation = 1 << 14, ///< 3' terminal interval truncated (other splice junction matches) 74 fCmp_5pExtension = 1 << 15, ///< 5' terminal interval extended (other splice junction matches) 75 fCmp_5pTruncation = 1 << 16, ///< 5' terminal interval truncated (other splice junction matches) 76 fCmp_StrandDifferent = 1 << 17, ///< different strand 77 fCmp_FuzzDifferent = 1 << 18, ///< indicates fuzz mismatch if set 78 fCmp_Match = 1 << 19 ///< all junctions match (fuzz-agnostic) 79 }; 80 81 enum EOverlapMethod { 82 eOverlap_vs_Union, ///< overlap versus the union of the two features 83 eOverlap_vs_Shorter, ///< overlap versus the shorter of the two features 84 eOverlap_vs_First, ///< overlap versus the first of the two features 85 eOverlap_vs_Second ///< overlap versus the second of the two features 86 }; 87 88 typedef int TCompareFlags; 89 enum FCompareFlags { 90 fCmp_IgnoreStrand = 1 << 0, ///< strand-less comparison 91 fCmp_Defaults = 0 92 }; 93 94 //This struct keeps the result of comparison of two exons 95 struct SIntervalComparisonResult : CObject 96 { 97 public: SIntervalComparisonResultCCompareSeq_locs::SIntervalComparisonResult98 SIntervalComparisonResult(unsigned pos1, unsigned pos2, FCompareLocs result, int pos_comparison = 0) 99 : m_exon_ordinal1(pos1), m_exon_ordinal2(pos2), m_result(result), m_position_comparison(pos_comparison) {} SIntervalComparisonResultCCompareSeq_locs::SIntervalComparisonResult100 SIntervalComparisonResult() { SIntervalComparisonResult(0, 0, fCmp_Unknown, 0); } 101 102 missing_firstCCompareSeq_locs::SIntervalComparisonResult103 inline bool missing_first() const {return m_exon_ordinal1 == 0;} missing_secondCCompareSeq_locs::SIntervalComparisonResult104 inline bool missing_second() const {return m_exon_ordinal2 == 0;} 105 106 unsigned m_exon_ordinal1; 107 unsigned m_exon_ordinal2; 108 FCompareLocs m_result; 109 int m_position_comparison; //we need to know which exon is "ahead" so the overlaps can be correctly reported, 110 //e.g. 1:>1> vs. 1:<1< 111 }; 112 CCompareSeq_locs(const CSeq_loc & loc1,const CSeq_loc & loc2,CScope * scope2,TCompareFlags flags=fCmp_Defaults)113 CCompareSeq_locs(const CSeq_loc& loc1, const CSeq_loc& loc2, CScope* scope2, TCompareFlags flags = fCmp_Defaults) 114 : m_scope_t(scope2) 115 , m_flags(flags) 116 { 117 m_loc1 = CRef<CSeq_loc>(new CSeq_loc); 118 m_loc1->Assign(loc1); 119 m_loc2 = CRef<CSeq_loc>(new CSeq_loc); 120 m_loc2->Assign(loc2); 121 if ( m_flags & fCmp_IgnoreStrand ) { 122 m_loc1->ResetStrand(); 123 m_loc2->ResetStrand(); 124 } 125 this->Reset(); 126 } 127 128 /// Reset cached comparison results Reset()129 void Reset() 130 { 131 this->m_cachedOverlapValues = false; 132 this->x_Compare(); 133 } 134 135 /// Symmetrical overlap is defined as length(intersection(loc1, loc2) / (length(loc1) + length(loc2)) 136 /// intra-loc overlaps are merged (otherwise non-sensical results are possible) GetSymmetricalOverlap() const137 double GetSymmetricalOverlap() const 138 { 139 return GetOverlap(eOverlap_vs_Union); 140 } 141 142 /// Relative overlap is defined as ratio of the length of the overlap to the length of the shorter feature GetRelativeOverlap() const143 double GetRelativeOverlap() const 144 { 145 return GetOverlap(eOverlap_vs_Shorter); 146 } 147 148 /// Calculate overlap according to the specified method. GetOverlap(EOverlapMethod method) const149 double GetOverlap(EOverlapMethod method) const 150 { 151 if(!m_cachedOverlapValues) { 152 x_ComputeOverlapValues(); 153 } 154 155 TSeqPos denom = 0; 156 157 switch(method) { 158 case eOverlap_vs_Union: 159 denom = m_len_seqloc1 + m_len_seqloc2 - m_len_seqloc_overlap; 160 break; 161 case eOverlap_vs_Shorter: 162 denom = static_cast<TSeqPos>(std::min(m_len_seqloc1, m_len_seqloc2)); 163 break; 164 case eOverlap_vs_First: 165 denom = m_len_seqloc1; 166 break; 167 case eOverlap_vs_Second: 168 denom = m_len_seqloc2; 169 break; 170 } 171 172 return (denom == 0) ? 0.0 : (static_cast<double>(m_len_seqloc_overlap) / denom); 173 } 174 GetSplicingSimilarity(float & score,int * loc1_intervals=NULL,int * loc2_intervals=NULL) const175 void GetSplicingSimilarity(float& score, int* loc1_intervals = NULL, int* loc2_intervals = NULL) const 176 { 177 if(!m_cachedOverlapValues) { 178 x_ComputeOverlapValues(); 179 } 180 181 score = m_shared_sites_score; 182 if(loc1_intervals) { 183 *loc1_intervals = m_loc1_interval_count; 184 } 185 if(loc2_intervals) { 186 *loc2_intervals = m_loc2_interval_count; 187 } 188 } 189 190 191 /// str_out will contain human-readable summary of the internal comparison 192 TCompareLocsFlags GetResult(string* str_out = NULL) const; 193 194 /// The evidence string is a whitespace-separated list of exon comparisons 195 /// Each exon comparison is a pair of exon ordinals (on query and target features) 196 /// separated by a colon; target exon ordinal may contain splice junction "operators" 197 /// that establish relationship to the query exon. 198 /// 199 /// '>' and '<' denote the splice junction shifts in 3' and 5' direction respectively 200 /// (relative to the master sequence). E.g. 201 /// '4:4' = 4th exon matches exactly on both sequneces 202 /// '4:>4>' = 4th exon shifted in 3' direction relative to the overlapping query exon 203 /// '4:<4>' = 4th exon extended in both directions relative to the overlapping query exon 204 /// '4:<4' = 4th exon has 5' junction extended relative to the overlapping query exon 205 /// etc. 206 /// 207 /// '~' are sentinels for non-overlapping exons. e.g. 208 /// '5:~' = 5th exon on query location is unmatched; 209 /// '~:5' = 5th exon on target location is unmatched. 210 /// 211 /// neighboring exon comparisons of the same class are collapsed in groups: 212 /// '5-20:4-19' = exons 5 to 20 match exons 4 to 19 on target 213 /// '21-23:~' = exons 21 to 23 do not overlap the target. 214 /// 215 /// 216 /// The exon ordinals are numbered by their position within the feature. 217 string GetEvidenceString() const; 218 219 /// Return the vector of individual exon comparisons GetIndividualComparisons() const220 const vector<SIntervalComparisonResult>& GetIndividualComparisons() const 221 { 222 return m_IntComparisons; 223 } 224 225 private: 226 227 /// This helper struct is used to accumulate the neighboring comparisons 228 /// of the same class, such that the comparison [... 3:5 4:6 ... 20:22 ...] 229 /// can be represented as [... 3-20:5-22 ...] 230 struct SIntervalComparisonResultGroup 231 { 232 public: SIntervalComparisonResultGroupCCompareSeq_locs::SIntervalComparisonResultGroup233 SIntervalComparisonResultGroup(bool isReverse) 234 : m_first(0, 0, fCmp_Unknown, 0) 235 , m_last(0, 0, fCmp_Unknown, 0) 236 , m_isReverse(isReverse) 237 {} 238 239 string ToString(); 240 IsValidCCompareSeq_locs::SIntervalComparisonResultGroup241 bool IsValid() { 242 return !(m_first.m_exon_ordinal1 == 0 && m_first.m_exon_ordinal2 == 0 && m_last.m_exon_ordinal1 == 0 && m_last.m_exon_ordinal2 == 0); 243 } 244 ResetCCompareSeq_locs::SIntervalComparisonResultGroup245 void Reset(const SIntervalComparisonResult& r) 246 { 247 m_first = r; 248 m_last = r; 249 } 250 251 /// if the comparison is neighboring and of the same class, set the terminal compariosn to it 252 /// and return true; otherwise return false AddCCompareSeq_locs::SIntervalComparisonResultGroup253 bool Add(const SIntervalComparisonResult& r) 254 { 255 if(r.m_position_comparison == m_last.m_position_comparison 256 && r.m_result == m_last.m_result 257 && ((!m_isReverse && r.m_exon_ordinal1 == m_last.m_exon_ordinal1 + 1) 258 || (m_isReverse && r.m_exon_ordinal1 == m_last.m_exon_ordinal1 - 1) 259 || (r.m_exon_ordinal1 == 0 && m_last.m_exon_ordinal1 == 0)) 260 && ((!m_isReverse && r.m_exon_ordinal2 == m_last.m_exon_ordinal2 + 1) 261 || (m_isReverse && r.m_exon_ordinal2 == m_last.m_exon_ordinal2 - 1) 262 || (r.m_exon_ordinal2 == 0 && m_last.m_exon_ordinal2 == 0))) 263 { 264 m_last = r; 265 return true; 266 } else { 267 return false; 268 } 269 } 270 271 private: 272 273 SIntervalComparisonResult m_first; 274 SIntervalComparisonResult m_last; 275 bool m_isReverse; 276 }; 277 278 279 //this struct is jusst a wrapper to keep the counts together 280 struct ResultCounts 281 { ResultCountsCCompareSeq_locs::ResultCounts282 ResultCounts() : 283 loc1_int(0), 284 loc2_int(0), 285 matched(0), 286 partially_matched(0), 287 unknown(0), 288 extra(0), 289 missing(0), 290 missing_3p(0), 291 extra_3p(0), 292 missing_5p(0), 293 extra_5p(0) {} 294 missing_internalCCompareSeq_locs::ResultCounts295 inline unsigned missing_internal() const {return missing - (missing_3p + missing_5p); } extra_internalCCompareSeq_locs::ResultCounts296 inline unsigned extra_internal() const {return extra - (extra_3p + extra_5p); } 297 298 unsigned loc1_int; 299 unsigned loc2_int; 300 unsigned matched; 301 unsigned partially_matched; //ext|trunc|overlap|subset|superset 302 unsigned unknown; 303 unsigned extra; //extra exons (5'+internal+3') 304 unsigned missing; //missing exons (5'+internal+3') 305 unsigned missing_3p; 306 unsigned extra_3p; 307 unsigned missing_5p; 308 unsigned extra_5p; 309 }; 310 311 /// Process the seq_locs and generate the m_IntComparisons vector; Recompute the counts 312 void x_Compare(); 313 314 /// Recompute m_len_seqloc_overlap, m_len_seqloc1, and m_len_seqloc2 315 void x_ComputeOverlapValues() const; 316 317 /// Compare two exons 318 FCompareLocs x_CompareInts(const CSeq_loc& loc1, const CSeq_loc& loc2) const; 319 320 321 ResultCounts m_counts; 322 bool m_sameStrand; 323 bool m_sameBioseq; 324 325 mutable bool m_cachedOverlapValues; 326 mutable TSeqPos m_len_seqloc_overlap; 327 mutable TSeqPos m_len_seqloc1; 328 mutable TSeqPos m_len_seqloc2; 329 mutable int m_loc1_interval_count; 330 mutable int m_loc2_interval_count; 331 mutable float m_shared_sites_score; 332 333 334 vector<SIntervalComparisonResult> m_IntComparisons; 335 CRef<CSeq_loc> m_loc1; 336 CRef<CSeq_loc> m_loc2; 337 CScope* m_scope_t; 338 TCompareFlags m_flags; 339 340 }; 341 342 343 344 345 346 /// CCompareFeats represens a result of comparison of two features. 347 /// (CCompareFeats::m_compare stores the actual result) 348 /// These comparisons will be produces by CCompare_Regions 349 class NCBI_XALGOSEQ_EXPORT CCompareFeats : public CObject 350 { 351 public: CCompareFeats(const CSeq_feat & feat1,const CSeq_loc & feat1_mapped_loc,double mapped_identity,const CSeq_loc & feat1_self_loc,CScope * scope1,const CSeq_feat & feat2,const CSeq_loc & feat2_self_loc,CScope * scope2)352 CCompareFeats(const CSeq_feat& feat1 353 , const CSeq_loc& feat1_mapped_loc 354 , double mapped_identity 355 , const CSeq_loc& feat1_self_loc 356 , CScope* scope1 357 , const CSeq_feat& feat2 358 , const CSeq_loc& feat2_self_loc 359 , CScope* scope2) 360 : m_feat1(&feat1) 361 , m_feat1_mapped_loc(&feat1_mapped_loc) 362 , m_feat1_self_loc(&feat1_self_loc) 363 , m_scope_q(scope1) 364 , m_feat2(&feat2) 365 , m_feat2_self_loc(&feat2_self_loc) 366 , m_scope_t(scope2) 367 , m_compare(new CCompareSeq_locs(feat1_mapped_loc, feat2_self_loc, scope2)) // feat1_mapped_loc lives in scope2 368 , m_irrelevance(0) 369 , m_mapped_identity(mapped_identity) 370 {} 371 372 373 /// No matching feat2 CCompareFeats(const CSeq_feat & feat1,const CSeq_loc & feat1_mapped_loc,double mapped_identity,const CSeq_loc & feat1_self_loc,CScope * scope1)374 CCompareFeats(const CSeq_feat& feat1 375 , const CSeq_loc& feat1_mapped_loc ///mapped to feat2's coordinate system 376 , double mapped_identity 377 , const CSeq_loc& feat1_self_loc 378 , CScope* scope1) 379 : m_feat1(&feat1) 380 , m_feat1_mapped_loc(&feat1_mapped_loc) 381 , m_feat1_self_loc(&feat1_self_loc) 382 , m_scope_q(scope1) 383 , m_irrelevance(1) //Forward 384 , m_mapped_identity(mapped_identity) 385 {} 386 387 /// No matching feat1 CCompareFeats(const CSeq_feat & feat2,const CSeq_loc & feat2_self_loc,double mapped_identity,CScope * scope2)388 CCompareFeats(const CSeq_feat& feat2, const CSeq_loc& feat2_self_loc, double mapped_identity, CScope* scope2) 389 : m_feat2(&feat2) 390 , m_feat2_self_loc(&feat2_self_loc) 391 , m_scope_t(scope2) 392 , m_irrelevance(2) //Reverse 393 , m_mapped_identity(mapped_identity) 394 {} 395 396 397 friend CNcbiOstream& operator<<(CNcbiOstream& out, const CCompareFeats& cf); 398 399 400 401 GetMappedIdentity() const402 double GetMappedIdentity() const 403 { 404 return m_mapped_identity; 405 } 406 407 // Return true iff features being compared are of the same subtype IsSameSubtype() const408 bool IsSameSubtype() const 409 { 410 return IsMatch() 411 && m_feat1->CanGetData() 412 && m_feat2->CanGetData() 413 && (m_feat1->GetData().GetSubtype() == m_feat2->GetData().GetSubtype()); 414 } 415 IsSameType() const416 bool IsSameType() const 417 { 418 return IsMatch() 419 && m_feat1->CanGetData() 420 && m_feat2->CanGetData() 421 && (m_feat1->GetData().Which() == m_feat2->GetData().Which()); 422 } 423 424 // Return true iff labels are the same and fCmp_Match flag is set in the comparison result IsIdentical() const425 bool IsIdentical() const 426 { 427 return IsMatch() 428 && (CCompareFeats::s_GetFeatLabel(*m_feat1) == CCompareFeats::s_GetFeatLabel(*m_feat2)) 429 && (m_compare->GetResult() & CCompareSeq_locs::fCmp_Match); 430 } 431 s_GetLocLabel(const CSeq_loc & loc,bool merged=false)432 static string s_GetLocLabel(const CSeq_loc& loc, bool merged = false) 433 { 434 string s = ""; 435 436 if(!merged) { 437 loc.GetLabel(&s); 438 } else { 439 CRef<CSeq_loc> merged_loc = sequence::Seq_loc_Merge(loc, CSeq_loc::fMerge_SingleRange, NULL); 440 merged_loc->GetLabel(&s); 441 } 442 return s; 443 } 444 s_GetFeatLabel(const CSeq_feat & gene_feat,feature::TFeatLabelFlags type=feature::fFGL_Both)445 static string s_GetFeatLabel(const CSeq_feat& gene_feat, feature::TFeatLabelFlags type = feature::fFGL_Both) 446 { 447 string gene_label = ""; 448 feature::GetLabel(gene_feat, &gene_label, type, NULL); 449 return gene_label; 450 } 451 452 GetFeatQ() const453 CConstRef<CSeq_feat> GetFeatQ() const {return m_feat1;} GetFeatT() const454 CConstRef<CSeq_feat> GetFeatT() const {return m_feat2;} 455 GetMappedLocQ() const456 CConstRef<CSeq_loc> GetMappedLocQ() const {return m_feat1_mapped_loc;} GetSelfLocQ() const457 CConstRef<CSeq_loc> GetSelfLocQ() const {return m_feat1_self_loc;} GetSelfLocT() const458 CConstRef<CSeq_loc> GetSelfLocT() const {return m_feat2_self_loc;} 459 IsMatch() const460 bool IsMatch() const {return !m_feat1.IsNull() && !m_feat2.IsNull();} GetComparison() const461 CConstRef<CCompareSeq_locs> GetComparison() const {return m_compare;} 462 463 GetIrrelevance() const464 int GetIrrelevance() const {return m_irrelevance; } SetIrrelevance(int val)465 void SetIrrelevance(int val) {m_irrelevance =val;} 466 private: 467 CConstRef<CSeq_feat> m_feat1; 468 CConstRef<CSeq_loc> m_feat1_mapped_loc; 469 CConstRef<CSeq_loc> m_feat1_self_loc; 470 CScope* m_scope_q; 471 472 473 CConstRef<CSeq_feat> m_feat2; 474 CConstRef<CSeq_loc> m_feat2_self_loc; 475 CScope* m_scope_t; 476 477 478 CRef<CCompareSeq_locs> m_compare; 479 int m_irrelevance; 480 bool m_unmatched; 481 double m_mapped_identity; 482 }; 483 484 /////////////////////////////////////////////////////////////////////////////// 485 486 /// Compare multiple feature annotations on the specified seq_locs. 487 class NCBI_XALGOSEQ_EXPORT CCompareSeqRegions : public CObject 488 { 489 public: 490 enum EScoreMethod { 491 eScore_SymmetricPctOverlap ///< length of overlap / (sum of lengths - overlaps) 492 , eScore_Feat1PctOverlap ///< length of overlap / (length of 1st feat) 493 , eScore_Feat2PctOverlap ///< length of overlap / (length of 2nd feat) 494 }; 495 496 enum EComparisonOptions { 497 fSelectBest = (1<<0), 498 fMergeExons = (1<<1), 499 fDifferentGenesOnly = (1<<2), 500 fCreateSentinelGenes = (1<<3), 501 fSameTypeOnly = (1<<4) 502 }; 503 504 typedef int TComparisonOptions; 505 506 CCompareSeqRegions(const CSeq_loc & query_loc,CScope * q_scope,CScope * t_scope,ILocMapper & mapper,const SAnnotSelector & q_sel,const SAnnotSelector & t_sel,const CSeq_id & target_id,TComparisonOptions options=fSelectBest|fMergeExons,EScoreMethod score_method=eScore_SymmetricPctOverlap)507 CCompareSeqRegions(const CSeq_loc& query_loc 508 , CScope* q_scope 509 , CScope* t_scope 510 , ILocMapper& mapper 511 , const SAnnotSelector& q_sel 512 , const SAnnotSelector& t_sel 513 , const CSeq_id& target_id 514 , TComparisonOptions options = fSelectBest|fMergeExons 515 , EScoreMethod score_method = eScore_SymmetricPctOverlap) 516 : m_loc_q(&query_loc) 517 , m_scope_q(q_scope) 518 , m_scope_t(t_scope) 519 , m_mapper(&mapper) 520 , m_selector_q(q_sel) 521 , m_selector_t(t_sel) 522 , m_target_id(&target_id) 523 , m_comp_options(options) 524 , m_score_method(score_method) 525 , m_loc_q_ci(*m_scope_q, *m_loc_q, q_sel) 526 , m_already_processed_unmatched_targets(false) 527 { 528 529 //when initializing the whole_locs, we use maximally large intervals 530 //instead of Whole seqloc type because Seq_loc_Mapper can't digest those 531 //in the case of incomplete scopes, such as pre-locuslink LDS type 532 CRef<CSeq_loc> t_whole_loc(new CSeq_loc); 533 CRef<CSeq_id> t_id(new CSeq_id); 534 t_id->Assign(target_id); 535 t_whole_loc->SetInt().SetId(*t_id); 536 t_whole_loc->SetInt().SetFrom(0); 537 t_whole_loc->SetInt().SetTo(((TSeqPos) (-10))); 538 539 540 CRef<CSeq_loc> q_whole_loc(new CSeq_loc); 541 CRef<CSeq_id> q_id(new CSeq_id); 542 q_id->Assign(sequence::GetId(query_loc, 0)); 543 q_whole_loc->SetInt().SetId(*q_id); 544 q_whole_loc->SetInt().SetFrom(0); 545 q_whole_loc->SetInt().SetTo(((TSeqPos) (-10))); 546 547 548 m_self_mapper_q.Reset(new CSeq_loc_Mapper(*q_whole_loc, *q_whole_loc, m_scope_q)); 549 m_self_mapper_t.Reset(new CSeq_loc_Mapper(*t_whole_loc, *t_whole_loc, m_scope_t)); 550 551 m_seen_targets.clear(); 552 } 553 554 Rewind()555 void Rewind() { 556 m_loc_q_ci.Rewind(); 557 m_seen_targets.clear(); 558 } 559 SetOptions()560 TComparisonOptions& SetOptions() {return m_comp_options;} GetOptions() const561 TComparisonOptions GetOptions() const {return m_comp_options;} 562 GetQueryLoc() const563 const CSeq_loc& GetQueryLoc() const {return *m_loc_q;} 564 565 bool NextComparisonGroup(vector<CRef<CCompareFeats> >& v); 566 void SelectMatches(vector<CRef<CCompareFeats> >& v); 567 static int s_GetGeneId(const CSeq_feat& feat); 568 private: 569 void x_GetPutativeMatches(vector<CRef<CCompareFeats> >& v, CConstRef<CSeq_feat> q_feat); 570 CConstRef<CSeq_loc> x_GetSelfLoc( 571 const CSeq_loc& loc, 572 CScope* scope, 573 bool merge_single_range); 574 575 576 577 CConstRef<CSeq_loc> m_loc_q; 578 CScope* m_scope_q; 579 CScope* m_scope_t; 580 CRef<ILocMapper> m_mapper; 581 const SAnnotSelector& m_selector_q; 582 const SAnnotSelector& m_selector_t; 583 CConstRef<CSeq_id> m_target_id; 584 CRef<CSeq_loc_Mapper> m_self_mapper_q; 585 CRef<CSeq_loc_Mapper> m_self_mapper_t; 586 TComparisonOptions m_comp_options; 587 EScoreMethod m_score_method; 588 CFeat_CI m_loc_q_ci; 589 std::set<std::string> m_seen_targets; 590 //loc-labels of all target features that have been compared 591 //(we use it to collect the target features that are not comparable at the end) 592 bool m_already_processed_unmatched_targets; 593 }; 594 595 596 END_NCBI_SCOPE 597 598 #endif 599