1 #ifndef OLIGOFAR_CSEQIDS__HPP
2 #define OLIGOFAR_CSEQIDS__HPP
3 
4 #include "defs.hpp"
5 #include <objects/seqloc/Seq_id.hpp>
6 #include <deque>
7 #include <map>
8 #include <set>
9 
10 BEGIN_OLIGOFAR_SCOPES
11 
12 class CSeqIds
13 {
14 public:
15 	typedef objects::CBioseq::TId TIds;
16 	typedef objects::CSeq_id TSeqId;
17 
18 	class CSeqDef
19 	{
20 	public:
21 		typedef set<string> TIdSet;
CSeqDef(int oid=-1)22 		CSeqDef( int oid = -1 ) : m_oid( oid ) {}
CSeqDef(int oid,const TIds & ids)23 		CSeqDef( int oid, const TIds& ids ) : m_oid( oid ) { AddIds( ids ); }
GetIdStrings() const24 		const TIdSet& GetIdStrings() const { return m_ids; }
25 		string GetBestIdString() const;
26 		string GetFullIdString() const;
HasId(const TSeqId & seqId) const27 		bool HasId( const TSeqId& seqId ) const { return HasId( seqId.AsFastaString() ); }
HasId(const string & seqId) const28 		bool HasId( const string&  seqId ) const { return m_ids.find( seqId ) != m_ids.end(); }
AddId(const TSeqId & seqId)29 		void AddId( const TSeqId& seqId ) { AddId( seqId.AsFastaString() ); }
AddId(const string & seqId)30 		void AddId( const string&  seqId ) { m_ids.insert( seqId ); }
31 		void AddIds( const string& seqIds );
32 		void AddIds( const TIds& ids );
33 		bool HasAnyId( const TIds& ids ) const;
34 		bool HasAnyId( const string& ids ) const;
HasOid() const35 		bool HasOid() const { return m_oid >= 0; }
36 		void SetOid( int oid );
37 		int  GetOid() const;
38 	protected:
39 		bool m_oidSet;
40 		int m_oid;
41 		TIdSet m_ids;
42 	};
43 
44 	typedef map<string,int> TSeqId2Ord;
45 	typedef deque<CSeqDef> TOrd2SeqIds;
46 	typedef map<int,int> TOid2Ord;
47 
CSeqIds()48 	CSeqIds() : m_lastOrd( -1 ) {}
49 
Clear()50 	void Clear() { m_oid2ord.clear(); m_seqid2ord.clear(); m_ord2seqids.clear(); m_lastOrd = -1; }
IsEmpty() const51 	bool IsEmpty() const { return m_ord2seqids.empty(); }
52 	int  Register( const TIds& ids, int oid = -1 );
53 	int  Register( const string& ids, int oid = -1 );
Register(int oid)54 	int  Register( int oid ) { TIds ids; return Register( ids, oid ); }
55 	const CSeqDef& GetSeqDef( int ord ) const;
56 
57 	int GetOrdById( const TIds& ids ) const;
58 	int GetOrdByOid( int oid ) const;
59 
60 	// following function is used to avoid calling GetOrdById after Register in independend modules
GetLastOrd() const61 	int GetLastOrd() const { ASSERT( m_lastOrd != -1 ); return m_lastOrd; }
62 protected:
63 	TOrd2SeqIds m_ord2seqids;
64 	TSeqId2Ord  m_seqid2ord;
65 	TOid2Ord    m_oid2ord;
66 
67 	int m_lastOrd;
68 };
69 
GetBestIdString() const70 inline string CSeqIds::CSeqDef::GetBestIdString() const
71 {
72 	if( m_ids.size() ) return *m_ids.begin();
73 	else return "UNKNOWN";
74 }
75 
GetFullIdString() const76 inline string CSeqIds::CSeqDef::GetFullIdString() const
77 {
78 	if( m_ids.size() ) {
79 		string out;
80 		ITERATE( TIdSet, id, m_ids ) {
81 			if( id != m_ids.begin() ) out += "|";
82 			out += *id;
83 		}
84         return out;
85 	} else return "UNKNOWN";
86 }
87 
AddIds(const string & seqIds)88 inline void CSeqIds::CSeqDef::AddIds( const string& seqIds )
89 {
90 	TIds ids;
91 	objects::CSeq_id::ParseFastaIds( ids, seqIds );
92 	AddIds( ids );
93 }
94 
AddIds(const TIds & ids)95 inline void CSeqIds::CSeqDef::AddIds( const TIds& ids )
96 {
97 	ITERATE( TIds, id, ids ) AddId( **id );
98 }
99 
HasAnyId(const TIds & ids) const100 inline bool CSeqIds::CSeqDef::HasAnyId( const TIds& ids ) const
101 {
102 	ITERATE( TIds, id, ids ) if( HasId( **id ) ) return true;
103 	return false;
104 }
105 
HasAnyId(const string & seqIds) const106 inline bool CSeqIds::CSeqDef::HasAnyId( const string& seqIds ) const
107 {
108 	TIds ids;
109 	objects::CSeq_id::ParseFastaIds( ids, seqIds );
110 	return HasAnyId( ids );
111 }
112 
SetOid(int oid)113 inline void CSeqIds::CSeqDef::SetOid( int oid )
114 {
115 	if( oid < 0 )
116 		THROW( logic_error, "Attempt to set invalid oid " << oid << " for " << GetFullIdString() );
117 	if( HasOid() && oid != m_oid )
118 		THROW( logic_error, "Can't redefine oid for " << GetFullIdString() << " from " << m_oid << " to " << oid );
119 	m_oid = oid;
120 }
121 
GetOid() const122 inline int CSeqIds::CSeqDef::GetOid() const
123 {
124 	if( !HasOid() )
125 		THROW( logic_error, "Attempt to get oid when it is not set for " << GetFullIdString() );
126 	return m_oid;
127 }
128 
Register(const TIds & ids,int oid)129 inline int CSeqIds::Register( const TIds& ids, int oid )
130 {
131 	ITERATE( TIds, i, ids ) {
132 		string id( (*i)->AsFastaString() );
133 		TSeqId2Ord::iterator x = m_seqid2ord.find( id );
134 		if( x != m_seqid2ord.end() ) {
135 			int ord = x->second;
136 			m_ord2seqids[ord].AddIds( ids );
137 			if( oid >= 0 ) {
138 				m_ord2seqids[ord].SetOid( oid );
139 				m_oid2ord.insert( make_pair( oid, ord ) );
140 			}
141 			return m_lastOrd = ord;
142 		}
143 	}
144 	if( oid >= 0 ) {
145 		TOid2Ord::iterator i = m_oid2ord.find( oid );
146 		if( i != m_oid2ord.end() ) {
147 			int ord = i->second;
148 			m_ord2seqids[ord].AddIds( ids );
149 			return m_lastOrd = ord;
150 		}
151 	}
152 	ASSERT( ids.size() || oid >= 0 );
153 	int ord = m_ord2seqids.size();
154 	m_ord2seqids.push_back( CSeqDef( oid, ids ) );
155 	ITERATE( TIds, i, ids )
156 		m_seqid2ord.insert( make_pair( (*i)->AsFastaString(), ord ) );
157 	if( oid >= 0 ) m_oid2ord.insert( make_pair( oid, ord ) );
158 	return m_lastOrd = ord;
159 }
160 
Register(const string & idstr,int oid)161 inline int CSeqIds::Register( const string& idstr, int oid )
162 {
163 	TIds ids;
164 	objects::CSeq_id::ParseFastaIds( ids, idstr );
165 	return Register( ids, oid );
166 }
167 
GetSeqDef(int ord) const168 inline const CSeqIds::CSeqDef& CSeqIds::GetSeqDef( int ord ) const
169 {
170 	ASSERT( ord >= 0 && ord < (int)m_ord2seqids.size() );
171 	return m_ord2seqids[ord];
172 }
173 
GetOrdById(const TIds & ids) const174 inline int CSeqIds::GetOrdById( const TIds& ids ) const
175 {
176 	ITERATE( TIds, i, ids ) {
177 		string id( (*i)->AsFastaString() );
178 		TSeqId2Ord::const_iterator x = m_seqid2ord.find( id );
179 		if( x != m_seqid2ord.end() ) return x->second;
180 	}
181 	return -1;
182 }
183 
GetOrdByOid(int oid) const184 inline int CSeqIds::GetOrdByOid( int oid ) const
185 {
186 	TOid2Ord::const_iterator x = m_oid2ord.find( oid );
187 	if( x == m_oid2ord.end() ) return -1;
188 	return x->second;
189 }
190 
191 END_OLIGOFAR_SCOPES
192 
193 #endif
194