1 #ifndef OLIGOFAR_CSEQIDS__HPP
2 #define OLIGOFAR_CSEQIDS__HPP
3
4 #include "defs.hpp"
5 #include <objects/seqloc/Seq_id.hpp>
6 #include <deque>
7 #include <map>
8 #include <set>
9
10 BEGIN_OLIGOFAR_SCOPES
11
12 class CSeqIds
13 {
14 public:
15 typedef objects::CBioseq::TId TIds;
16 typedef objects::CSeq_id TSeqId;
17
18 class CSeqDef
19 {
20 public:
21 typedef set<string> TIdSet;
CSeqDef(int oid=-1)22 CSeqDef( int oid = -1 ) : m_oid( oid ) {}
CSeqDef(int oid,const TIds & ids)23 CSeqDef( int oid, const TIds& ids ) : m_oid( oid ) { AddIds( ids ); }
GetIdStrings() const24 const TIdSet& GetIdStrings() const { return m_ids; }
25 string GetBestIdString() const;
26 string GetFullIdString() const;
HasId(const TSeqId & seqId) const27 bool HasId( const TSeqId& seqId ) const { return HasId( seqId.AsFastaString() ); }
HasId(const string & seqId) const28 bool HasId( const string& seqId ) const { return m_ids.find( seqId ) != m_ids.end(); }
AddId(const TSeqId & seqId)29 void AddId( const TSeqId& seqId ) { AddId( seqId.AsFastaString() ); }
AddId(const string & seqId)30 void AddId( const string& seqId ) { m_ids.insert( seqId ); }
31 void AddIds( const string& seqIds );
32 void AddIds( const TIds& ids );
33 bool HasAnyId( const TIds& ids ) const;
34 bool HasAnyId( const string& ids ) const;
HasOid() const35 bool HasOid() const { return m_oid >= 0; }
36 void SetOid( int oid );
37 int GetOid() const;
38 protected:
39 bool m_oidSet;
40 int m_oid;
41 TIdSet m_ids;
42 };
43
44 typedef map<string,int> TSeqId2Ord;
45 typedef deque<CSeqDef> TOrd2SeqIds;
46 typedef map<int,int> TOid2Ord;
47
CSeqIds()48 CSeqIds() : m_lastOrd( -1 ) {}
49
Clear()50 void Clear() { m_oid2ord.clear(); m_seqid2ord.clear(); m_ord2seqids.clear(); m_lastOrd = -1; }
IsEmpty() const51 bool IsEmpty() const { return m_ord2seqids.empty(); }
52 int Register( const TIds& ids, int oid = -1 );
53 int Register( const string& ids, int oid = -1 );
Register(int oid)54 int Register( int oid ) { TIds ids; return Register( ids, oid ); }
55 const CSeqDef& GetSeqDef( int ord ) const;
56
57 int GetOrdById( const TIds& ids ) const;
58 int GetOrdByOid( int oid ) const;
59
60 // following function is used to avoid calling GetOrdById after Register in independend modules
GetLastOrd() const61 int GetLastOrd() const { ASSERT( m_lastOrd != -1 ); return m_lastOrd; }
62 protected:
63 TOrd2SeqIds m_ord2seqids;
64 TSeqId2Ord m_seqid2ord;
65 TOid2Ord m_oid2ord;
66
67 int m_lastOrd;
68 };
69
GetBestIdString() const70 inline string CSeqIds::CSeqDef::GetBestIdString() const
71 {
72 if( m_ids.size() ) return *m_ids.begin();
73 else return "UNKNOWN";
74 }
75
GetFullIdString() const76 inline string CSeqIds::CSeqDef::GetFullIdString() const
77 {
78 if( m_ids.size() ) {
79 string out;
80 ITERATE( TIdSet, id, m_ids ) {
81 if( id != m_ids.begin() ) out += "|";
82 out += *id;
83 }
84 return out;
85 } else return "UNKNOWN";
86 }
87
AddIds(const string & seqIds)88 inline void CSeqIds::CSeqDef::AddIds( const string& seqIds )
89 {
90 TIds ids;
91 objects::CSeq_id::ParseFastaIds( ids, seqIds );
92 AddIds( ids );
93 }
94
AddIds(const TIds & ids)95 inline void CSeqIds::CSeqDef::AddIds( const TIds& ids )
96 {
97 ITERATE( TIds, id, ids ) AddId( **id );
98 }
99
HasAnyId(const TIds & ids) const100 inline bool CSeqIds::CSeqDef::HasAnyId( const TIds& ids ) const
101 {
102 ITERATE( TIds, id, ids ) if( HasId( **id ) ) return true;
103 return false;
104 }
105
HasAnyId(const string & seqIds) const106 inline bool CSeqIds::CSeqDef::HasAnyId( const string& seqIds ) const
107 {
108 TIds ids;
109 objects::CSeq_id::ParseFastaIds( ids, seqIds );
110 return HasAnyId( ids );
111 }
112
SetOid(int oid)113 inline void CSeqIds::CSeqDef::SetOid( int oid )
114 {
115 if( oid < 0 )
116 THROW( logic_error, "Attempt to set invalid oid " << oid << " for " << GetFullIdString() );
117 if( HasOid() && oid != m_oid )
118 THROW( logic_error, "Can't redefine oid for " << GetFullIdString() << " from " << m_oid << " to " << oid );
119 m_oid = oid;
120 }
121
GetOid() const122 inline int CSeqIds::CSeqDef::GetOid() const
123 {
124 if( !HasOid() )
125 THROW( logic_error, "Attempt to get oid when it is not set for " << GetFullIdString() );
126 return m_oid;
127 }
128
Register(const TIds & ids,int oid)129 inline int CSeqIds::Register( const TIds& ids, int oid )
130 {
131 ITERATE( TIds, i, ids ) {
132 string id( (*i)->AsFastaString() );
133 TSeqId2Ord::iterator x = m_seqid2ord.find( id );
134 if( x != m_seqid2ord.end() ) {
135 int ord = x->second;
136 m_ord2seqids[ord].AddIds( ids );
137 if( oid >= 0 ) {
138 m_ord2seqids[ord].SetOid( oid );
139 m_oid2ord.insert( make_pair( oid, ord ) );
140 }
141 return m_lastOrd = ord;
142 }
143 }
144 if( oid >= 0 ) {
145 TOid2Ord::iterator i = m_oid2ord.find( oid );
146 if( i != m_oid2ord.end() ) {
147 int ord = i->second;
148 m_ord2seqids[ord].AddIds( ids );
149 return m_lastOrd = ord;
150 }
151 }
152 ASSERT( ids.size() || oid >= 0 );
153 int ord = m_ord2seqids.size();
154 m_ord2seqids.push_back( CSeqDef( oid, ids ) );
155 ITERATE( TIds, i, ids )
156 m_seqid2ord.insert( make_pair( (*i)->AsFastaString(), ord ) );
157 if( oid >= 0 ) m_oid2ord.insert( make_pair( oid, ord ) );
158 return m_lastOrd = ord;
159 }
160
Register(const string & idstr,int oid)161 inline int CSeqIds::Register( const string& idstr, int oid )
162 {
163 TIds ids;
164 objects::CSeq_id::ParseFastaIds( ids, idstr );
165 return Register( ids, oid );
166 }
167
GetSeqDef(int ord) const168 inline const CSeqIds::CSeqDef& CSeqIds::GetSeqDef( int ord ) const
169 {
170 ASSERT( ord >= 0 && ord < (int)m_ord2seqids.size() );
171 return m_ord2seqids[ord];
172 }
173
GetOrdById(const TIds & ids) const174 inline int CSeqIds::GetOrdById( const TIds& ids ) const
175 {
176 ITERATE( TIds, i, ids ) {
177 string id( (*i)->AsFastaString() );
178 TSeqId2Ord::const_iterator x = m_seqid2ord.find( id );
179 if( x != m_seqid2ord.end() ) return x->second;
180 }
181 return -1;
182 }
183
GetOrdByOid(int oid) const184 inline int CSeqIds::GetOrdByOid( int oid ) const
185 {
186 TOid2Ord::const_iterator x = m_oid2ord.find( oid );
187 if( x == m_oid2ord.end() ) return -1;
188 return x->second;
189 }
190
191 END_OLIGOFAR_SCOPES
192
193 #endif
194