1 /* $Id: criteria.cpp 401964 2013-06-04 15:01:28Z camacho $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Thomas W. Rackers
27 *
28 * File Description:
29 * This file defines global functions which support the criteria function
30 * classes.
31 *
32 */
33
34 /// \author Thomas W. Rackers
35
36 #include <ncbi_pch.hpp>
37 #include <objtools/blast/seqdb_writer/impl/criteria.hpp>
38
39 using ncbi::CCriteriaSet;
40
41
42 BEGIN_NCBI_SCOPE
43
44
45 USING_SCOPE(objects);
46
47
48 #define MY_TRACING 0
49
50
51 /// Static (file-local) function
GetAvailableCriteria(void)52 static TCriteriaMap& GetAvailableCriteria(void)
53 {
54 // Constructor will be invoked only once.
55 static TCriteriaMap* s_Available_Criteria_ptr = new TCriteriaMap;
56
57 // Remember if we've initialized the above map.
58 static bool map_empty = true;
59
60 // There should be an entry for each of the subclasses of ICriteria
61 // defined in criteria.hpp. The order is not important, here they're
62 // listed alphabetically.
63 // This is the one location where instances of the predefined subclasses
64 // of ICriteria are actually created. This effectively makes them all
65 // singletons, although the Singleton Design Pattern is not enforced.
66 // Custom subclasses may contain state information, in which case they
67 // probably will NOT be used as singletons.
68 static ICriteria* allCriteria[] = {
69 new CCriteria_EST_HUMAN,
70 new CCriteria_EST_MOUSE,
71 new CCriteria_EST_OTHERS,
72 new CCriteria_PDB,
73 new CCriteria_REFSEQ,
74 new CCriteria_REFSEQ_GENOMIC,
75 new CCriteria_REFSEQ_RNA,
76 new CCriteria_SWISSPROT
77 };
78
79 // If map is empty, we need to fill it in. Should happen only once.
80 if (map_empty && s_Available_Criteria_ptr->size() == 0) {
81 // Hey Mister Tally-man, tally me criteria....
82 const int numCriteria = sizeof allCriteria / sizeof allCriteria[0];
83
84 // Create a temporary vector container over which we can iterate.
85 vector<ICriteria*> allCriteria_vec(
86 allCriteria,
87 allCriteria + numCriteria
88 );
89
90 // Add each predefined criteria function to available set.
91 ITERATE(vector<ICriteria*>, critter, allCriteria_vec) {
92 ICriteria* crit = *critter;
93 (*s_Available_Criteria_ptr)[crit->GetLabel()] = crit;
94 }
95
96 // Map is no longer empty. (Quicker than calling size() each time.)
97 map_empty = false;
98 }
99
100 return *s_Available_Criteria_ptr;
101 }
102
103
104 /// Constructor, creates empty container.
CCriteriaSet(void)105 CCriteriaSet::CCriteriaSet(void) {}
106
107
108 /// Destructor
~CCriteriaSet()109 /* virtual */ CCriteriaSet::~CCriteriaSet() {}
110
111
112 /// Factory method, retrieve pointer to existing instance of one of the
113 /// CCriteria_* subclasses.
114 ///
115 /// \param label of desired CCriteria_* class
116 /// \return pointer to CCriteria_* class instance or NULL if not found
GetCriteriaInstance(const string & label)117 /* static */ ICriteria* CCriteriaSet::GetCriteriaInstance(
118 const string& label
119 ) {
120 TCriteriaMap& critMap = GetAvailableCriteria();
121 TCriteriaMap::iterator it = critMap.find(label);
122 if (it == critMap.end()) {
123 return NULL;
124 } else {
125 return it->second;
126 }
127 }
128
129
130 /// Add a CCriteria_* class to the supported collection. This method
131 /// supports adding custom criteria classes which do not appear in the
132 /// list of predefined criteria classes.
133 ///
134 /// \param pointer to criteria function instance
135 /// \return true if criteria function added to set
AddCriteria(ICriteria * critPtr)136 bool CCriteriaSet::AddCriteria(
137 ICriteria* critPtr
138 ) {
139 // If the map doesn't already include an entry with the supplied
140 // criteria class's label, a new entry will be created.
141 // If the map does have such an entry already, the map is not changed.
142 // (This is a characteristic of map::operator[].)
143 unsigned int numEntries = this->m_Crit_from_Label.size();
144 this->m_Crit_from_Label[critPtr->GetLabel()] = critPtr;
145
146 // Return true if entry was unique and therefore added,
147 // false if entry duplicated existing entry.
148 return (this->m_Crit_from_Label.size() > numEntries);
149 }
150
151
152 /// Add a CCriteria_* class to the supported collection. This method
153 /// only supports adding criteria classes which are already defined
154 /// in header file criteria.hpp, because they are looked up by label.
155 ///
156 /// \param label string on which set is searched for criteria function
157 /// \return true if criteria function added to set
AddCriteria(const string & label)158 bool CCriteriaSet::AddCriteria(
159 const string& label
160 ) {
161 TCriteriaMap& critMap = GetAvailableCriteria();
162 TCriteriaMap::iterator it = critMap.find(label);
163 if (it == critMap.end()) {
164 return false;
165 } else {
166 return AddCriteria(it->second);
167 }
168 }
169
170
171 /// Fetch a CCriteria_* class based on its label.
172 /// Returns NULL if a matching class is not found.
173 ///
174 /// \param label string on which set is searched for criteria function
175 /// \return pointer to criteria function
FindCriteria(const string & label)176 const ICriteria* CCriteriaSet::FindCriteria(
177 const string& label
178 ) {
179 TCriteriaMap::iterator it = this->m_Crit_from_Label.find(label);
180 if (it == this->m_Crit_from_Label.end()) {
181 return NULL;
182 } else {
183 return it->second;
184 }
185 }
186
187
188 /// Return the number of entries in the container. Because a map
189 /// (not a multimap) is used, duplicate entries should not be counted.
190 ///
191 /// \return count of entries
GetCriteriaCount(void) const192 unsigned int CCriteriaSet::GetCriteriaCount(void) const {
193 return this->m_Crit_from_Label.size();
194 }
195
196
197 /// Return the actual container. This will permit the caller to
198 /// iterate through the entries.
199 ///
200 /// \return internal container
GetCriteriaMap(void) const201 const TCriteriaMap& CCriteriaSet::GetCriteriaMap(void) const {
202 return this->m_Crit_from_Label;
203 }
204
205
CCriteriaSet_CalculateMemberships(const SDIRecord & direcord)206 CBlast_def_line::TMemberships CCriteriaSet_CalculateMemberships(
207 const SDIRecord& direcord
208 ) {
209 static CCriteriaSet* critSet_ptr = NULL;
210
211 if (critSet_ptr == NULL) {
212 // First time through, create the default criteria set.
213 // Verify that all succeed.
214 critSet_ptr = new CCriteriaSet;
215 _VERIFY(critSet_ptr->AddCriteria("swissprot"));
216 _VERIFY(critSet_ptr->AddCriteria("pdb"));
217 _VERIFY(critSet_ptr->AddCriteria("refseq"));
218 _VERIFY(critSet_ptr->AddCriteria("refseq_rna"));
219 _VERIFY(critSet_ptr->AddCriteria("refseq_genomic"));
220 }
221
222 // Need number of bits per mask word (i.e. an int).
223 static const int MASK_WORD_SIZE = sizeof (int) * 8; // 8 bits/byte
224
225 // Define initially empty membership bit list (list of ints).
226 CBlast_def_line::TMemberships bits_list;
227
228 // Get the set of accepted criteria in container form
229 // to allow iteration through its contents.
230 const TCriteriaMap& critContainer = critSet_ptr->GetCriteriaMap();
231
232 // Check the DI record against each criteria function in turn.
233 ITERATE(TCriteriaMap, critItem, critContainer) {
234
235 // Get the criteria function. (The container is actually a map,
236 // not a set, so each item is a std::pair<string,ICriteria*>.)
237 ICriteria* crit = critItem->second;
238 #if MY_TRACING
239 NcbiCout << "Checking for " << crit->GetLabel() << "... ";
240 #endif
241
242 if (crit->is(&direcord)) {
243
244 // Get assigned membership bit for this criteria function.
245 int membership_bit = crit->GetMembershipBit();
246 #if MY_TRACING
247 NcbiCout << "is a " << crit->GetLabel() << ", membership bit is "
248 << membership_bit << NcbiEndl;
249 #endif
250
251 // Verify it's not one of the two disallowed values.
252 if (membership_bit != ICriteria::eUNASSIGNED
253 && membership_bit != ICriteria::eDO_NOT_USE) {
254
255 // Convert 1-indexed membership bit to 0-indexed offset.
256 int bit_offset = membership_bit - 1;
257
258 // Create bit-mask word and calculate its offset in list.
259 int bit_mask = 0x1 << (bit_offset % MASK_WORD_SIZE);
260 int list_offset = bit_offset / MASK_WORD_SIZE;
261
262 // Is list long enough for calculated offset?
263 int list_size = bits_list.size();
264 if (list_size <= list_offset) {
265 // No, append extra zeros if needed.
266 while (list_size < list_offset) {
267 bits_list.push_back(0);
268 ++list_size;
269 }
270 // Now append the bit-mask word.
271 bits_list.push_back(bit_mask);
272 } else {
273 // Yes, step through list, then bitwise-OR bitmask
274 // into proper location in list.
275 int cur_offset = 0;
276 NON_CONST_ITERATE(
277 CBlast_def_line::TMemberships,
278 iter,
279 bits_list
280 ) {
281 if (cur_offset == list_offset) {
282 *iter |= bit_mask;
283 break; /* to HERE */
284 }
285 ++cur_offset;
286 }
287 /* HERE */
288 }
289
290 }
291
292 #if MY_TRACING
293 } else {
294
295 NcbiCout << "is not a " << crit->GetLabel() << NcbiEndl;
296
297 #endif
298 }
299
300 }
301
302 // Our work here is finished.
303 return bits_list;
304 }
305
306 int
CCriteriaSet_CalculateMemberships(const SDIRecord & direcord,objects::CBlast_def_line & defline)307 CCriteriaSet_CalculateMemberships(const SDIRecord& direcord,
308 objects::CBlast_def_line& defline)
309 {
310 int retval = 0;
311 try {
312 CBlast_def_line::TMemberships list(CCriteriaSet_CalculateMemberships(direcord));
313 defline.SetMemberships().swap(list);
314 } catch (...) {
315 retval = 1;
316 }
317 return retval;
318 }
319
320 END_NCBI_SCOPE
321