1 /*  $Id: seq_id_resolver.hpp 503818 2016-06-08 12:00:05Z holmesbr $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Description:
27 *
28 *   Translate HGVS expression to Variation-ref seq-feats.
29 *   HGVS nomenclature rules: http://www.hgvs.org/mutnomen/
30 *
31 * ===========================================================================
32 */
33 
34 #ifndef SEQ_ID_RESOLVER_HPP_
35 #define SEQ_ID_RESOLVER_HPP_
36 
37 
38 #include <util/ncbi_cache.hpp>
39 #include <objects/seq/seq_id_handle.hpp>
40 #include <objects/genomecoll/GC_Assembly.hpp>
41 #include <objmgr/scope.hpp>
42 
43 
44 BEGIN_NCBI_SCOPE
45 
46 class CRegexp;
47 
48 BEGIN_objects_SCOPE
49 class CEntrez2Client;
50 class CSeq_loc;
51 class CSeq_loc_Mapper;
52 END_objects_SCOPE
53 
54 USING_SCOPE(objects);
55 
56 /// A helper class to convert a string to a seq-id.
57 /// Default implementation assumes a seq-id string, but user may implement
58 /// their own, e.g. to interpret "chr1" based on tax/assembly
59 class CSeq_id_Resolver : public CObject
60 {
61 public:
CSeq_id_Resolver(CScope & scope)62     CSeq_id_Resolver(CScope& scope)
63       : m_scope(&scope)
64       , m_regexp(NULL)
65     {}
66 
67     virtual bool CanCreate(const string& s);
68 
Get(const string & s)69     virtual CSeq_id_Handle Get(const string& s)
70     {
71         if (m_cache.find(s) == m_cache.end()) {
72             m_cache[s] = x_Create(s);
73         }
74         return m_cache[s];
75     }
76 
77     virtual ~CSeq_id_Resolver();
78 
79     typedef list<CRef<CSeq_id_Resolver> > TResolvers;
80     /// Iterate through resolvers and resolve using the first one that can do it
81     /// Return empty handle otherwise.
s_Get(TResolvers & resolvers,const string & s)82     static CSeq_id_Handle s_Get(TResolvers& resolvers, const string& s)
83     {
84         NON_CONST_ITERATE(TResolvers, it, resolvers) {
85             CSeq_id_Resolver& r = **it;
86             if (r.CanCreate(s)) {
87                 return r.Get(s);
88             }
89         }
90         return CSeq_id_Handle();
91     }
92 
93 protected:
94     virtual CSeq_id_Handle x_Create(const string& s);
95     CRef<CScope> m_scope;
96     typedef map<string, CSeq_id_Handle> TCache;
97     TCache m_cache;
98     CRegexp* m_regexp;
99 };
100 
101 /// Resolve LRG seq-ids, e.g. LRG_123, LRG_123t1, LRG_123p1
102 class CSeq_id_Resolver__LRG : public CSeq_id_Resolver
103 {
104 public:
105     CSeq_id_Resolver__LRG(CScope& scope);
~CSeq_id_Resolver__LRG()106     virtual ~CSeq_id_Resolver__LRG() {}
107 private:
108     virtual CSeq_id_Handle x_Create(const string& s);
109 };
110 
111 /// Resolve CCDS-id to an NM
112 class CSeq_id_Resolver__CCDS : public CSeq_id_Resolver
113 {
114 public:
115     CSeq_id_Resolver__CCDS(CScope& scope);
116     virtual ~CSeq_id_Resolver__CCDS();
117 
118 private:
119     virtual CSeq_id_Handle x_Create(const string& s);
120     objects::CEntrez2Client* m_entrez;
121 };
122 
123 /// Resolve chromosome names based on GC_Assembly
124 class CSeq_id_Resolver__ChrNamesFromGC : public CSeq_id_Resolver
125 {
126 public:
127     CSeq_id_Resolver__ChrNamesFromGC(const CGC_Assembly& assembly, CScope& scope);
~CSeq_id_Resolver__ChrNamesFromGC()128     virtual ~CSeq_id_Resolver__ChrNamesFromGC() {}
129 
130     virtual bool CanCreate(const string& s);
131 
132 private:
133     virtual CSeq_id_Handle x_Create(const string& s);
134     virtual CConstRef<CSeq_loc> x_MapLoc(const CSeq_loc& loc) const;
135 
136     typedef map<string, CSeq_id_Handle> TData;
137     TData m_data;
138     mutable CRef<CSeq_loc_Mapper> m_SLMapper;
139 
140     typedef CCache<string, CSeq_id_Handle> TLocCache;
141     auto_ptr<TLocCache> m_loccache;
142 
143 };
144 
145 END_NCBI_SCOPE
146 
147 #endif
148