1 /*  $Id: criteria.cpp 401964 2013-06-04 15:01:28Z camacho $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Thomas W. Rackers
27  *
28  * File Description:
29  *   This file defines global functions which support the criteria function
30  *   classes.
31  *
32  */
33 
34 /// \author Thomas W. Rackers
35 
36 #include <ncbi_pch.hpp>
37 #include <objtools/blast/seqdb_writer/impl/criteria.hpp>
38 
39 using ncbi::CCriteriaSet;
40 
41 
42 BEGIN_NCBI_SCOPE
43 
44 
45 USING_SCOPE(objects);
46 
47 
48 #define MY_TRACING 0
49 
50 
51 /// Static (file-local) function
GetAvailableCriteria(void)52 static TCriteriaMap& GetAvailableCriteria(void)
53 {
54     // Constructor will be invoked only once.
55     static TCriteriaMap* s_Available_Criteria_ptr = new TCriteriaMap;
56 
57     // Remember if we've initialized the above map.
58     static bool map_empty = true;
59 
60     // There should be an entry for each of the subclasses of ICriteria
61     // defined in criteria.hpp.  The order is not important, here they're
62     // listed alphabetically.
63     // This is the one location where instances of the predefined subclasses
64     // of ICriteria are actually created.  This effectively makes them all
65     // singletons, although the Singleton Design Pattern is not enforced.
66     // Custom subclasses may contain state information, in which case they
67     // probably will NOT be used as singletons.
68     static ICriteria* allCriteria[] = {
69             new CCriteria_EST_HUMAN,
70             new CCriteria_EST_MOUSE,
71             new CCriteria_EST_OTHERS,
72             new CCriteria_PDB,
73             new CCriteria_REFSEQ,
74             new CCriteria_REFSEQ_GENOMIC,
75             new CCriteria_REFSEQ_RNA,
76             new CCriteria_SWISSPROT
77     };
78 
79     // If map is empty, we need to fill it in.  Should happen only once.
80     if (map_empty  &&  s_Available_Criteria_ptr->size() == 0) {
81         // Hey Mister Tally-man, tally me criteria....
82         const int numCriteria = sizeof allCriteria / sizeof allCriteria[0];
83 
84         // Create a temporary vector container over which we can iterate.
85         vector<ICriteria*> allCriteria_vec(
86                 allCriteria,
87                 allCriteria + numCriteria
88         );
89 
90         // Add each predefined criteria function to available set.
91         ITERATE(vector<ICriteria*>, critter, allCriteria_vec) {
92             ICriteria* crit = *critter;
93             (*s_Available_Criteria_ptr)[crit->GetLabel()] = crit;
94         }
95 
96         // Map is no longer empty.  (Quicker than calling size() each time.)
97         map_empty = false;
98     }
99 
100     return *s_Available_Criteria_ptr;
101 }
102 
103 
104 /// Constructor, creates empty container.
CCriteriaSet(void)105 CCriteriaSet::CCriteriaSet(void) {}
106 
107 
108 /// Destructor
~CCriteriaSet()109 /* virtual */ CCriteriaSet::~CCriteriaSet() {}
110 
111 
112 /// Factory method, retrieve pointer to existing instance of one of the
113 /// CCriteria_* subclasses.
114 ///
115 /// \param label of desired CCriteria_* class
116 /// \return pointer to CCriteria_* class instance or NULL if not found
GetCriteriaInstance(const string & label)117 /* static */ ICriteria* CCriteriaSet::GetCriteriaInstance(
118         const string& label
119 ) {
120     TCriteriaMap& critMap = GetAvailableCriteria();
121     TCriteriaMap::iterator it = critMap.find(label);
122     if (it == critMap.end()) {
123         return NULL;
124     } else {
125         return it->second;
126     }
127 }
128 
129 
130 /// Add a CCriteria_* class to the supported collection.  This method
131 /// supports adding custom criteria classes which do not appear in the
132 /// list of predefined criteria classes.
133 ///
134 /// \param pointer to criteria function instance
135 /// \return true if criteria function added to set
AddCriteria(ICriteria * critPtr)136 bool CCriteriaSet::AddCriteria(
137         ICriteria* critPtr
138 ) {
139     // If the map doesn't already include an entry with the supplied
140     // criteria class's label, a new entry will be created.
141     // If the map does have such an entry already, the map is not changed.
142     // (This is a characteristic of map::operator[].)
143     unsigned int numEntries = this->m_Crit_from_Label.size();
144     this->m_Crit_from_Label[critPtr->GetLabel()] = critPtr;
145 
146     // Return true if entry was unique and therefore added,
147     // false if entry duplicated existing entry.
148     return (this->m_Crit_from_Label.size() > numEntries);
149 }
150 
151 
152 /// Add a CCriteria_* class to the supported collection.  This method
153 /// only supports adding criteria classes which are already defined
154 /// in header file criteria.hpp, because they are looked up by label.
155 ///
156 /// \param label string on which set is searched for criteria function
157 /// \return true if criteria function added to set
AddCriteria(const string & label)158 bool CCriteriaSet::AddCriteria(
159         const string& label
160 ) {
161     TCriteriaMap& critMap = GetAvailableCriteria();
162     TCriteriaMap::iterator it = critMap.find(label);
163     if (it == critMap.end()) {
164         return false;
165     } else {
166         return AddCriteria(it->second);
167     }
168 }
169 
170 
171 /// Fetch a CCriteria_* class based on its label.
172 /// Returns NULL if a matching class is not found.
173 ///
174 /// \param label string on which set is searched for criteria function
175 /// \return pointer to criteria function
FindCriteria(const string & label)176 const ICriteria* CCriteriaSet::FindCriteria(
177         const string& label
178 ) {
179     TCriteriaMap::iterator it = this->m_Crit_from_Label.find(label);
180     if (it == this->m_Crit_from_Label.end()) {
181         return NULL;
182     } else {
183         return it->second;
184     }
185 }
186 
187 
188 /// Return the number of entries in the container.  Because a map
189 /// (not a multimap) is used, duplicate entries should not be counted.
190 ///
191 /// \return count of entries
GetCriteriaCount(void) const192 unsigned int CCriteriaSet::GetCriteriaCount(void) const {
193     return this->m_Crit_from_Label.size();
194 }
195 
196 
197 /// Return the actual container.  This will permit the caller to
198 /// iterate through the entries.
199 ///
200 /// \return internal container
GetCriteriaMap(void) const201 const TCriteriaMap& CCriteriaSet::GetCriteriaMap(void) const {
202     return this->m_Crit_from_Label;
203 }
204 
205 
CCriteriaSet_CalculateMemberships(const SDIRecord & direcord)206 CBlast_def_line::TMemberships CCriteriaSet_CalculateMemberships(
207         const SDIRecord& direcord
208 ) {
209     static CCriteriaSet* critSet_ptr = NULL;
210 
211     if (critSet_ptr == NULL) {
212         // First time through, create the default criteria set.
213         // Verify that all succeed.
214         critSet_ptr = new CCriteriaSet;
215         _VERIFY(critSet_ptr->AddCriteria("swissprot"));
216         _VERIFY(critSet_ptr->AddCriteria("pdb"));
217         _VERIFY(critSet_ptr->AddCriteria("refseq"));
218         _VERIFY(critSet_ptr->AddCriteria("refseq_rna"));
219         _VERIFY(critSet_ptr->AddCriteria("refseq_genomic"));
220     }
221 
222     // Need number of bits per mask word (i.e. an int).
223     static const int MASK_WORD_SIZE = sizeof (int) * 8;    // 8 bits/byte
224 
225     // Define initially empty membership bit list (list of ints).
226     CBlast_def_line::TMemberships bits_list;
227 
228     // Get the set of accepted criteria in container form
229     // to allow iteration through its contents.
230     const TCriteriaMap& critContainer = critSet_ptr->GetCriteriaMap();
231 
232     // Check the DI record against each criteria function in turn.
233     ITERATE(TCriteriaMap, critItem, critContainer) {
234 
235         // Get the criteria function.  (The container is actually a map,
236         // not a set, so each item is a std::pair<string,ICriteria*>.)
237         ICriteria* crit = critItem->second;
238 #if MY_TRACING
239         NcbiCout << "Checking for " << crit->GetLabel() << "... ";
240 #endif
241 
242         if (crit->is(&direcord)) {
243 
244             // Get assigned membership bit for this criteria function.
245             int membership_bit = crit->GetMembershipBit();
246 #if MY_TRACING
247             NcbiCout << "is a " << crit->GetLabel() << ", membership bit is "
248                 << membership_bit << NcbiEndl;
249 #endif
250 
251             // Verify it's not one of the two disallowed values.
252             if (membership_bit != ICriteria::eUNASSIGNED
253                 &&  membership_bit != ICriteria::eDO_NOT_USE) {
254 
255                 // Convert 1-indexed membership bit to 0-indexed offset.
256                 int bit_offset = membership_bit - 1;
257 
258                 // Create bit-mask word and calculate its offset in list.
259                 int bit_mask = 0x1 << (bit_offset % MASK_WORD_SIZE);
260                 int list_offset = bit_offset / MASK_WORD_SIZE;
261 
262                 // Is list long enough for calculated offset?
263                 int list_size = bits_list.size();
264                 if (list_size <= list_offset) {
265                     // No, append extra zeros if needed.
266                     while (list_size < list_offset) {
267                         bits_list.push_back(0);
268                         ++list_size;
269                     }
270                     // Now append the bit-mask word.
271                     bits_list.push_back(bit_mask);
272                 } else {
273                     // Yes, step through list, then bitwise-OR bitmask
274                     // into proper location in list.
275                     int cur_offset = 0;
276                     NON_CONST_ITERATE(
277                             CBlast_def_line::TMemberships,
278                             iter,
279                             bits_list
280                     ) {
281                         if (cur_offset == list_offset) {
282                             *iter |= bit_mask;
283                             break;  /* to HERE */
284                         }
285                         ++cur_offset;
286                     }
287                     /* HERE */
288                 }
289 
290             }
291 
292 #if MY_TRACING
293         } else {
294 
295             NcbiCout << "is not a " << crit->GetLabel() << NcbiEndl;
296 
297 #endif
298         }
299 
300     }
301 
302     // Our work here is finished.
303     return bits_list;
304 }
305 
306 int
CCriteriaSet_CalculateMemberships(const SDIRecord & direcord,objects::CBlast_def_line & defline)307 CCriteriaSet_CalculateMemberships(const SDIRecord& direcord,
308                                   objects::CBlast_def_line& defline)
309 {
310     int retval = 0;
311     try {
312         CBlast_def_line::TMemberships list(CCriteriaSet_CalculateMemberships(direcord));
313         defline.SetMemberships().swap(list);
314     } catch (...) {
315         retval = 1;
316     }
317     return retval;
318 }
319 
320 END_NCBI_SCOPE
321