1 /* $Id: seq_id_resolver.hpp 503818 2016-06-08 12:00:05Z holmesbr $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * File Description: 27 * 28 * Translate HGVS expression to Variation-ref seq-feats. 29 * HGVS nomenclature rules: http://www.hgvs.org/mutnomen/ 30 * 31 * =========================================================================== 32 */ 33 34 #ifndef SEQ_ID_RESOLVER_HPP_ 35 #define SEQ_ID_RESOLVER_HPP_ 36 37 38 #include <util/ncbi_cache.hpp> 39 #include <objects/seq/seq_id_handle.hpp> 40 #include <objects/genomecoll/GC_Assembly.hpp> 41 #include <objmgr/scope.hpp> 42 43 44 BEGIN_NCBI_SCOPE 45 46 class CRegexp; 47 48 BEGIN_objects_SCOPE 49 class CEntrez2Client; 50 class CSeq_loc; 51 class CSeq_loc_Mapper; 52 END_objects_SCOPE 53 54 USING_SCOPE(objects); 55 56 /// A helper class to convert a string to a seq-id. 57 /// Default implementation assumes a seq-id string, but user may implement 58 /// their own, e.g. to interpret "chr1" based on tax/assembly 59 class CSeq_id_Resolver : public CObject 60 { 61 public: CSeq_id_Resolver(CScope & scope)62 CSeq_id_Resolver(CScope& scope) 63 : m_scope(&scope) 64 , m_regexp(NULL) 65 {} 66 67 virtual bool CanCreate(const string& s); 68 Get(const string & s)69 virtual CSeq_id_Handle Get(const string& s) 70 { 71 if (m_cache.find(s) == m_cache.end()) { 72 m_cache[s] = x_Create(s); 73 } 74 return m_cache[s]; 75 } 76 77 virtual ~CSeq_id_Resolver(); 78 79 typedef list<CRef<CSeq_id_Resolver> > TResolvers; 80 /// Iterate through resolvers and resolve using the first one that can do it 81 /// Return empty handle otherwise. s_Get(TResolvers & resolvers,const string & s)82 static CSeq_id_Handle s_Get(TResolvers& resolvers, const string& s) 83 { 84 NON_CONST_ITERATE(TResolvers, it, resolvers) { 85 CSeq_id_Resolver& r = **it; 86 if (r.CanCreate(s)) { 87 return r.Get(s); 88 } 89 } 90 return CSeq_id_Handle(); 91 } 92 93 protected: 94 virtual CSeq_id_Handle x_Create(const string& s); 95 CRef<CScope> m_scope; 96 typedef map<string, CSeq_id_Handle> TCache; 97 TCache m_cache; 98 CRegexp* m_regexp; 99 }; 100 101 /// Resolve LRG seq-ids, e.g. LRG_123, LRG_123t1, LRG_123p1 102 class CSeq_id_Resolver__LRG : public CSeq_id_Resolver 103 { 104 public: 105 CSeq_id_Resolver__LRG(CScope& scope); ~CSeq_id_Resolver__LRG()106 virtual ~CSeq_id_Resolver__LRG() {} 107 private: 108 virtual CSeq_id_Handle x_Create(const string& s); 109 }; 110 111 /// Resolve CCDS-id to an NM 112 class CSeq_id_Resolver__CCDS : public CSeq_id_Resolver 113 { 114 public: 115 CSeq_id_Resolver__CCDS(CScope& scope); 116 virtual ~CSeq_id_Resolver__CCDS(); 117 118 private: 119 virtual CSeq_id_Handle x_Create(const string& s); 120 objects::CEntrez2Client* m_entrez; 121 }; 122 123 /// Resolve chromosome names based on GC_Assembly 124 class CSeq_id_Resolver__ChrNamesFromGC : public CSeq_id_Resolver 125 { 126 public: 127 CSeq_id_Resolver__ChrNamesFromGC(const CGC_Assembly& assembly, CScope& scope); ~CSeq_id_Resolver__ChrNamesFromGC()128 virtual ~CSeq_id_Resolver__ChrNamesFromGC() {} 129 130 virtual bool CanCreate(const string& s); 131 132 private: 133 virtual CSeq_id_Handle x_Create(const string& s); 134 virtual CConstRef<CSeq_loc> x_MapLoc(const CSeq_loc& loc) const; 135 136 typedef map<string, CSeq_id_Handle> TData; 137 TData m_data; 138 mutable CRef<CSeq_loc_Mapper> m_SLMapper; 139 140 typedef CCache<string, CSeq_id_Handle> TLocCache; 141 auto_ptr<TLocCache> m_loccache; 142 143 }; 144 145 END_NCBI_SCOPE 146 147 #endif 148