1 /* $Id: seqdbisam.cpp 631527 2021-05-19 13:49:38Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Kevin Bealer
27 *
28 */
29 //IRENA: ALL locking removed from this file CSeqDBLockHold & stays in some functions but is not used
30 /// @file seqdbisam.cpp
31 /// Implementation for the CSeqDBIsam class, which manages an ISAM
32 /// index of some particular kind of identifiers.
33 #include <ncbi_pch.hpp>
34 #include <objtools/blast/seqdb_reader/impl/seqdbisam.hpp>
35 #include <objects/seqloc/Seq_id.hpp>
36 #include <objects/general/general__.hpp>
37 #include <corelib/ncbiutil.hpp>
38
39 /// Place these definitions in the ncbi namespace
40 BEGIN_NCBI_SCOPE
41
42 /// Import this namespace
43 USING_SCOPE(objects);
44
45 /// Format version of the ISAM files
46 #define ISAM_VERSION 1
47
48 /// Default page size for numeric indices
49 #define DEFAULT_NISAM_SIZE 256
50
51 /// Default page size for string indices
52 #define DEFAULT_SISAM_SIZE 64
53
54 /// Special page size value which indicates a memory-only string index
55 #define MEMORY_ONLY_PAGE_SIZE 1
56
57
58 CSeqDBIsam::EErrorCode
x_InitSearch(void)59 CSeqDBIsam::x_InitSearch(void)
60 {
61 if(m_Initialized == true)
62 return eNoError;
63
64 TIndx info_needed = 10 * sizeof(Int4);
65
66 bool found_index_file =
67 m_Atlas.GetFileSizeL(m_IndexFname, m_IndexFileLength);
68
69 if ((! found_index_file) || (m_IndexFileLength < info_needed)) {
70 return eWrongFile;
71 }
72
73
74 Int4 * FileInfo = (Int4*)m_IndexLease.GetFileDataPtr(m_IndexFname,0);
75
76 // Check for consistence of files and parameters
77
78 Int4 Version = SeqDB_GetStdOrd(& FileInfo[0]);
79
80 if (Version != ISAM_VERSION)
81 return eBadVersion;
82
83 Int4 IsamType = SeqDB_GetStdOrd(& FileInfo[1]);
84
85 if (IsamType == eNumericLongId && m_Type == eNumeric) {
86 m_LongId = true;
87 m_TermSize = 12;
88 IsamType = eNumeric;
89 }
90
91 if (IsamType != m_Type)
92 return eBadType;
93
94 m_NumTerms = SeqDB_GetStdOrd(& FileInfo[3]);
95 m_NumSamples = SeqDB_GetStdOrd(& FileInfo[4]);
96 m_PageSize = SeqDB_GetStdOrd(& FileInfo[5]);
97 m_MaxLineSize = SeqDB_GetStdOrd(& FileInfo[6]);
98
99 if(m_PageSize != MEMORY_ONLY_PAGE_SIZE) {
100 // Special case of memory-only index
101 m_DataFileLength = SeqDB_GetStdOrd(& FileInfo[2]);
102
103 TIndx disk_file_length(0);
104 bool found_data_file =
105 m_Atlas.GetFileSizeL(m_DataFname, disk_file_length);
106
107 if ((! found_data_file) || (m_DataFileLength != disk_file_length)) {
108 return eWrongFile;
109 }
110 }
111
112 // This space reserved for future use
113
114 m_IdxOption = SeqDB_GetStdOrd(& FileInfo[7]);
115
116 m_KeySampleOffset = (9 * sizeof(Int4));
117
118 m_Initialized = true;
119
120 return eNoError;
121 }
122
x_GetPageNumElements(Int4 sample_num,Int4 * start)123 Int4 CSeqDBIsam::x_GetPageNumElements(Int4 sample_num,
124 Int4 * start)
125 {
126 Int4 num_elements(0);
127
128 *start = sample_num * m_PageSize;
129
130 if (sample_num + 1 == m_NumSamples) {
131 num_elements = m_NumTerms - *start;
132 } else {
133 num_elements = m_PageSize;
134 }
135
136 return num_elements;
137 }
138
139 CSeqDBIsam::EErrorCode
x_SearchIndexNumeric(Int8 Number,int * Data,Uint4 * Index,Int4 & SampleNum,bool & done)140 CSeqDBIsam::x_SearchIndexNumeric(Int8 Number,
141 int * Data,
142 Uint4 * Index,
143 Int4 & SampleNum,
144 bool & done)
145
146 {
147 if(m_Initialized == false) {
148 done = true;
149 // Return just any error
150 return eInitFailed;
151 }
152
153 if (x_OutOfBounds(Number)) {
154 done = true;
155 return eNotFound;
156 }
157
158 _ASSERT(m_Type != eNumericNoData);
159
160 // Search the sample file.
161
162 Int4 Start (0);
163 Int4 Stop (m_NumSamples - 1);
164
165 while(Stop >= Start) {
166 SampleNum = ((Uint4)(Stop + Start)) >> 1;
167
168 TIndx offset_begin = m_KeySampleOffset + (m_TermSize * SampleNum);
169 //TIndx offset_end = offset_begin + m_TermSize;
170
171 const void* keydatap(0);
172
173 Int8 Key(0);
174
175 keydatap = m_IndexLease.GetFileDataPtr(m_IndexFname,offset_begin);
176 Key = x_GetNumericKey (keydatap);
177
178 // If this is an exact match, return the master term number.
179
180 if (Key == Number) {
181 if (Data != NULL) {
182 *Data = x_GetNumericData(keydatap);
183 }
184
185 if (Index != NULL)
186 *Index = SampleNum * m_PageSize;
187
188 done = true;
189 return eNoError;
190 }
191
192 // Otherwise, search for the next sample.
193
194 if ( Number < Key )
195 Stop = --SampleNum;
196 else
197 Start = SampleNum +1;
198 }
199
200 // If the term is out of range altogether, report not finding it.
201
202 if ( (SampleNum < 0) || (SampleNum >= m_NumSamples)) {
203
204 if (Data != NULL)
205 *Data = eNotFound;
206
207 if(Index != NULL)
208 *Index = eNotFound;
209
210 done = true;
211 return eNotFound;
212 }
213
214 done = false;
215 return eNoError;
216 }
217
218 void
x_SearchNegativeMulti(int vol_start,int vol_end,CSeqDBNegativeList & ids,bool use_tis)219 CSeqDBIsam::x_SearchNegativeMulti(int vol_start,
220 int vol_end,
221 CSeqDBNegativeList & ids,
222 bool use_tis)
223
224 {
225 if(m_Initialized == false) {
226 NCBI_THROW(CSeqDBException,
227 eArgErr,
228 "Error: Unable to use ISAM index in batch mode.");
229 }
230
231 //m_Atlas.Lock(locked);
232
233
234 // We can use Parabolic Binary Search for the negative GI list but
235 // not for the ISAM file data, because in the negative ID list
236 // case, every line of the ISAM data must be looked at.
237
238 _ASSERT(m_Type != eNumericNoData);
239
240 //......................................................................
241 //
242 // Translate the entire Gi List.
243 //
244 //......................................................................
245
246 int gilist_size = use_tis ? ids.GetNumTis() : ids.GetNumGis();
247
248 int gilist_index = 0;
249
250 int sample_index(0);
251 const void * data_page (0);
252
253 while(sample_index < m_NumSamples) {
254 int start = 0, num_elements = 0;
255
256 x_MapDataPage(sample_index,
257 start,
258 num_elements,
259 & data_page);
260
261 for(int i = 0; i < num_elements; i++) {
262 Int8 isam_key(0);
263 int isam_data(0);
264
265 // 1. Get the ID+OID from the data page.
266
267 x_GetDataElement(data_page,
268 i,
269 isam_key,
270 isam_data);
271
272 // 2. Look for it in the negative id list.
273
274 bool found = false;
275
276 if (gilist_index < gilist_size) {
277 found = x_FindInNegativeList(ids,
278 gilist_index,
279 isam_key,
280 use_tis);
281 }
282
283 // 3. If not found, add the OID to the negative ID list.
284
285 if (isam_data < vol_end) {
286 if (found) {
287 // OID is found, but may not be included yet.
288 ids.AddVisibleOid(isam_data + vol_start);
289 } else {
290 // OID is included for iteration.
291 ids.AddIncludedOid(isam_data + vol_start);
292 }
293 }
294 }
295
296 // Move to next data page. Note that for a negative ID list
297 // processing, we don't actually fetch any samples, because
298 // every ID->OID line needs to be examined anyway.
299
300 sample_index ++;
301 }
302 }
303
304 //In case if acc2 does not have version and acc1 has version
305 //check if it is the same accession
s_IsSameAccession(string acc1,string acc2)306 static bool s_IsSameAccession(string acc1, string acc2)
307 {
308 bool sameAccession = false;
309 if(NStr::Find(acc2,".") == NPOS) { // no version in acc2
310 if(NStr::Find(acc1,".") != NPOS && NStr::Find(acc1, acc2) != NPOS) {
311 string accession, version;
312 NStr::SplitInTwo(acc1,".", accession, version);
313 if(acc2 == accession) {
314 sameAccession = true;
315 }
316 }
317 }
318 return sameAccession;
319 }
320
321 //In case if keys[currIndex] does not have version and keys[currIndex + 1] has version
322 //check if it is the same accession
s_IsSameAccession(vector<string> keys,int num_keys,int currIndex)323 static bool s_IsSameAccession(vector <string> keys, int num_keys, int currIndex)
324 {
325 bool sameAccession = false;
326 if(currIndex < num_keys - 1) {
327 sameAccession = s_IsSameAccession(keys[currIndex + 1], keys[currIndex]);
328 }
329 return sameAccession;
330 }
331
332 void
x_SearchNegativeMultiSeq(int vol_start,int vol_end,CSeqDBNegativeList & ids)333 CSeqDBIsam::x_SearchNegativeMultiSeq(int vol_start,
334 int vol_end,
335 CSeqDBNegativeList & ids)
336
337 {
338 int gilist_size = ids.ListSize();
339 if (! gilist_size) return;
340
341 if(m_Initialized == false) {
342 // Most ordinary errors (missing GIs for example) are
343 // ignored for "multi" mode searches. But if a GI list is
344 // specified, and cannot be interpreted, it is an error.
345
346 NCBI_THROW(CSeqDBException,
347 eArgErr,
348 "Error: Unable to use ISAM index in batch mode.");
349 }
350
351
352 vector<string> sample_keys;
353 vector<TIndx> page_offs;
354 vector<string> keys;
355 vector<int> vals;
356
357 sample_keys.reserve(m_NumSamples);
358 page_offs.reserve(m_NumSamples + 1);
359 keys.reserve(m_PageSize);
360 vals.reserve(m_PageSize);
361
362
363 x_LoadIndex(m_IndexLease, sample_keys, page_offs);
364
365 int gilist_index = 0;
366 int sample_index = 0;
367
368 while(sample_index < m_NumSamples) {
369
370 // Now we should be ready to search a data block.
371 keys.clear();
372 vals.clear();
373
374 int num_keys = m_PageSize;
375 if (sample_index + 1 == m_NumSamples) {
376 num_keys = m_NumTerms - sample_index * m_PageSize;
377 }
378
379 x_LoadData(m_DataLease, keys, vals, num_keys, page_offs[sample_index]);
380
381 for(int i = 0; i < num_keys; i++) {
382 // 2. Look for it in the negative id list.
383
384 bool found = false;
385 if (gilist_index < gilist_size) {
386 found = x_FindInNegativeList(ids,
387 gilist_index,
388 keys[i]);
389
390 }
391 if (vals[i] < vol_end) {
392 if (found) {
393 // OID is found, but may not be included yet.
394 ids.AddVisibleOid(vals[i] + vol_start);
395 //If next accession is the same as current, but with version
396 if(s_IsSameAccession(keys, num_keys, i)) {
397 i++; //skip next check - sequence already excluded
398 }
399 } else {
400 // OID is included for iteration.
401 //Only include next accession if it is not the same as current (but with version)
402 //because it may be in exclude list. The check will be done in the next step
403 if(!s_IsSameAccession(keys, num_keys, i)) {
404 ids.AddIncludedOid(vals[i] + vol_start);
405 }
406 }
407 }
408
409 }
410 // Move to next data page. Note that for a negative ID list
411 // processing, we don't actually fetch any samples, because
412 // every ID->OID line needs to be examined anyway.
413
414 sample_index ++;
415
416 }
417 }
418
419
420 CSeqDBIsam::EErrorCode
x_SearchDataNumeric(Int8 Number,int * Data,Uint4 * Index,Int4 SampleNum)421 CSeqDBIsam::x_SearchDataNumeric(Int8 Number,
422 int * Data,
423 Uint4 * Index,
424 Int4 SampleNum)
425
426 {
427 // Load the appropriate page of numbers into memory.
428 _ASSERT(m_Type != eNumericNoData);
429
430 Int4 Start(0);
431 Int4 NumElements = x_GetPageNumElements(SampleNum, & Start);
432
433 Int4 first = Start;
434 Int4 last = Start + NumElements - 1;
435
436 const void * KeyDataPage = NULL;
437 const void * KeyDataPageStart = NULL;
438
439 TIndx offset_begin = Start * m_TermSize;
440 //TIndx offset_end = offset_begin + m_TermSize * NumElements;
441
442 KeyDataPageStart = m_DataLease.GetFileDataPtr(m_DataFname,offset_begin);
443
444
445 KeyDataPage = (char *)KeyDataPageStart - Start * m_TermSize;
446
447 bool found (false);
448 Int4 current (0);
449
450 // Search the page for the number.
451 while (first <= last) {
452 current = (first+last)/2;
453
454 Int8 Key = x_GetNumericKey((char *)KeyDataPage + current * m_TermSize);
455
456 if (Key > Number) {
457 last = --current;
458 } else if (Key < Number) {
459 first = ++current;
460 } else {
461 found = true;
462 break;
463 }
464 }
465
466 if (found == false) {
467 if (Data != NULL)
468 *Data = eNotFound;
469
470 if(Index != NULL)
471 *Index = eNotFound;
472
473 return eNotFound;
474 }
475
476 if (Data != NULL) {
477 *Data = x_GetNumericData((char *)KeyDataPage + current * m_TermSize);
478 }
479
480 if(Index != NULL)
481 *Index = Start + current;
482
483 return eNoError;
484 }
485
486
487 // ------------------------NumericSearch--------------------------
488 // Purpose: Main search function of Numeric ISAM
489 //
490 // Parameters: Key - interer to search
491 // Data - returned value (for NIASM with data)
492 // Index - internal index in database
493 // Returns: ISAM Error Code
494 // NOTE: None
495 // ----------------------------------------------------------------
496
497 CSeqDBIsam::EErrorCode
x_NumericSearch(Int8 Number,int * Data,Uint4 * Index)498 CSeqDBIsam::x_NumericSearch(Int8 Number,
499 int * Data,
500 Uint4 * Index)
501
502 {
503 bool done (false);
504 Int4 SampleNum (0);
505
506 EErrorCode error =
507 x_SearchIndexNumeric(Number, Data, Index, SampleNum, done);
508
509 if (! done) {
510 error = x_SearchDataNumeric(Number, Data, Index, SampleNum);
511 }
512
513 return error;
514 }
515
x_DiffCharLease(const string & term_in,CSeqDBFileMemMap & lease,const string & file_name,TIndx file_length,Uint4 at_least,TIndx KeyOffset,bool ignore_case)516 int CSeqDBIsam::x_DiffCharLease(const string & term_in,
517 CSeqDBFileMemMap & lease,
518 const string & file_name,
519 TIndx file_length,
520 Uint4 at_least,
521 TIndx KeyOffset,
522 bool ignore_case)
523
524 {
525 int result(-1);
526
527 //m_Atlas.Lock(locked);
528
529 // Add one to term_end to insure we don't consider "AA" and "AAB"
530 // as equal.
531
532 TIndx offset_begin = KeyOffset;
533 TIndx term_end = KeyOffset + term_in.size() + 1;
534 TIndx map_end = term_end + at_least;
535
536 if (map_end > file_length) {
537 map_end = file_length;
538
539 if (term_end > map_end) {
540 term_end = map_end;
541 result = int(file_length - offset_begin);
542 }
543 }
544
545 const char * file_data = (const char *)lease.GetFileDataPtr(file_name,offset_begin);
546
547 Int4 dc_result =
548 x_DiffChar(term_in,
549 file_data,
550 file_data + term_in.size() + 1,
551 ignore_case);
552
553 if (dc_result != -1) {
554 return dc_result;
555 }
556
557 return result;
558 }
559
560 /// Return NUL for nulls or EOL characters
561 ///
562 /// This function returns a NUL byte for any of NUL, CR, or NL. This
563 /// is done because these characters are used to terminate the
564 /// variable length records in a string-based ISAM file.
565 ///
566 /// @param c
567 /// A character
568 /// @return
569 /// NUL or the same character
570 static inline char
s_SeqDBIsam_NullifyEOLs(char c)571 s_SeqDBIsam_NullifyEOLs(char c)
572 {
573 if (SEQDB_ISEOL(c)) {
574 return 0;
575 } else {
576 return c;
577 }
578 }
579
580 /// The terminating character for string ISAM keys when data is present.
581 const char ISAM_DATA_CHAR = (char) 2;
582
583 /// Returns true if the character is a terminator for an ISAM key.
ENDS_ISAM_KEY(char P)584 static inline bool ENDS_ISAM_KEY(char P)
585 {
586 return (P == ISAM_DATA_CHAR) || (s_SeqDBIsam_NullifyEOLs(P) == 0);
587 }
588
x_DiffChar(const string & term_in,const char * begin,const char * end,bool ignore_case)589 Int4 CSeqDBIsam::x_DiffChar(const string & term_in,
590 const char * begin,
591 const char * end,
592 bool ignore_case)
593 {
594 int result(-1);
595 int i(0);
596
597 const char * file_data = begin;
598 int bytes = int(end - begin);
599
600 for(i = 0; (i < bytes) && i < (int) term_in.size(); i++) {
601 char ch1 = term_in[i];
602 char ch2 = file_data[i];
603
604 if (ch1 != ch2) {
605 ch1 = s_SeqDBIsam_NullifyEOLs(ch1);
606 ch2 = s_SeqDBIsam_NullifyEOLs(ch2);
607
608 if (ignore_case) {
609 ch1 = toupper((unsigned char) ch1);
610 ch2 = toupper((unsigned char) ch2);
611 }
612
613 if (ch1 != ch2) {
614 break;
615 }
616 }
617 }
618
619 const char * p = file_data + i;
620
621 while((p < end) && ((*p) == ' ')) {
622 p++;
623 }
624
625 if (((p == end) || ENDS_ISAM_KEY(*p)) && (i == (int) term_in.size())) {
626 result = -1;
627 } else {
628 result = i;
629 }
630
631 return result;
632 }
633
x_ExtractPageData(const string & term_in,TIndx page_index,const char * beginp,const char * endp,vector<TIndx> & indices_out,vector<string> & keys_out,vector<string> & data_out)634 void CSeqDBIsam::x_ExtractPageData(const string & term_in,
635 TIndx page_index,
636 const char * beginp,
637 const char * endp,
638 vector<TIndx> & indices_out,
639 vector<string> & keys_out,
640 vector<string> & data_out)
641 {
642 // Collect all 'good' data from the page.
643
644 bool ignore_case = true;
645
646 Uint4 TermNum(0);
647
648 const char * indexp(beginp);
649 bool found_match(false);
650
651 while (indexp < endp) {
652 Int4 Diff = x_DiffChar(term_in,
653 indexp,
654 endp,
655 ignore_case);
656
657 if (Diff == -1) { // Complete match
658 found_match = true;
659
660 x_ExtractData(indexp,
661 endp,
662 keys_out,
663 data_out);
664
665 indices_out.push_back(page_index + TermNum);
666 } else {
667 // If we found a match, but the current term doesn't
668 // match, then we are past the set of matching entries.
669
670 if (found_match) {
671 break;
672 }
673 }
674
675 // Skip remainder of term, and any nulls after it.
676
677 while((indexp < endp) && s_SeqDBIsam_NullifyEOLs(*indexp)) {
678 indexp++;
679 }
680 while((indexp < endp) && (! s_SeqDBIsam_NullifyEOLs(*indexp))) {
681 indexp++;
682 }
683
684 TermNum++;
685 }
686 }
687
x_ExtractAllData(const string & term_in,TIndx sample_index,vector<TIndx> & indices_out,vector<string> & keys_out,vector<string> & data_out)688 void CSeqDBIsam::x_ExtractAllData(const string & term_in,
689 TIndx sample_index,
690 vector<TIndx> & indices_out,
691 vector<string> & keys_out,
692 vector<string> & data_out)
693
694 {
695 // The object at sample_index is known to match; we will iterate
696 // over the surrounding values to see if they match as well. No
697 // assumptions about how many keys can match are made here.
698
699 bool ignore_case = true;
700
701 int pre_amt = 1;
702 int post_amt = 1;
703
704 bool done_b(false), done_e(false);
705
706 const char * beginp(0);
707 const char * endp(0);
708
709 TIndx beg_off(0);
710 TIndx end_off(0);
711
712 while(! (done_b && done_e)) {
713 if (sample_index < pre_amt) {
714 beg_off = 0;
715 done_b = true;
716 } else {
717 beg_off = sample_index - pre_amt;
718 }
719
720 if ((m_NumSamples - sample_index) < post_amt) {
721 end_off = m_NumSamples;
722 done_e = true;
723 } else {
724 end_off = sample_index + post_amt;
725 }
726
727 x_LoadPage(beg_off, end_off, & beginp, & endp);
728
729 if (! done_b) {
730 Int4 diff_begin = x_DiffChar(term_in,
731 beginp,
732 endp,
733 ignore_case);
734
735 if (diff_begin != -1) {
736 done_b = true;
737 } else {
738 pre_amt ++;
739 }
740 }
741
742 if (! done_e) {
743 const char * last_term(0);
744 const char * p(endp-1);
745
746 // Skip over any non-terminating junk at the end
747
748 enum { eEndNulls, eLastTerm } search_stage = eEndNulls;
749
750 while(p > beginp) {
751 bool terminal = (0 == s_SeqDBIsam_NullifyEOLs(*p));
752
753 if (search_stage == eEndNulls) {
754 if (! terminal) {
755 search_stage = eLastTerm;
756 }
757 } else {
758 if (terminal) {
759 last_term = p + 1;
760 break;
761 }
762 }
763
764 p--;
765 }
766
767 if (! last_term) {
768 last_term = beginp;
769 }
770
771 Int4 diff_end = x_DiffChar(term_in,
772 last_term,
773 endp,
774 ignore_case);
775
776 if (diff_end != -1) {
777 done_e = true;
778 } else {
779 post_amt ++;
780 }
781 }
782 }
783
784 x_ExtractPageData(term_in,
785 m_PageSize * beg_off,
786 beginp,
787 endp,
788 indices_out,
789 keys_out,
790 data_out);
791 }
792
x_ExtractData(const char * key_start,const char * map_end,vector<string> & keys_out,vector<string> & data_out)793 void CSeqDBIsam::x_ExtractData(const char * key_start,
794 const char * map_end,
795 vector<string> & keys_out,
796 vector<string> & data_out)
797 {
798 const char * data_ptr(0);
799 const char * p(key_start);
800
801 while(p < map_end) {
802 switch(s_SeqDBIsam_NullifyEOLs(*p)) {
803 case 0:
804 if (data_ptr) {
805 keys_out.push_back(string(key_start, data_ptr));
806 data_out.push_back(string(data_ptr+1, p));
807 } else {
808 keys_out.push_back(string(key_start, p));
809 data_out.push_back("");
810 }
811 return;
812
813 case ISAM_DATA_CHAR:
814 data_ptr = p;
815
816 default:
817 p++;
818 }
819 }
820 }
821
822 CSeqDBIsam::TIndx
x_GetIndexKeyOffset(TIndx sample_offset,Uint4 sample_num)823 CSeqDBIsam::x_GetIndexKeyOffset(TIndx sample_offset,
824 Uint4 sample_num)
825
826 {
827 TIndx offset_begin = sample_offset + (sample_num * sizeof(Uint4));
828 //TIndx offset_end = offset_begin + sizeof(Uint4);
829
830
831 Int4 * key_offset_addr = (Int4 *)m_IndexLease.GetFileDataPtr(offset_begin);
832 return SeqDB_GetStdOrd(key_offset_addr);
833 }
834
835 void
x_GetIndexString(TIndx key_offset,int length,string & str,bool trim_to_null)836 CSeqDBIsam::x_GetIndexString(TIndx key_offset,
837 int length,
838 string & str,
839 bool trim_to_null)
840
841 {
842 //TIndx offset_end = key_offset + length;
843
844 const char * key_offset_addr =
845 (const char *)m_IndexLease.GetFileDataPtr(key_offset);
846
847
848 if (trim_to_null) {
849 for(int i = 0; i<length; i++) {
850 if (! key_offset_addr[i]) {
851 length = i;
852 break;
853 }
854 }
855 }
856
857 str.assign(key_offset_addr, length);
858 }
859
860 // Given an index, this computes the diff from the input term. It
861 // also returns the offset for that sample's key in KeyOffset.
862
x_DiffSample(const string & term_in,Uint4 SampleNum,TIndx & KeyOffset)863 int CSeqDBIsam::x_DiffSample(const string & term_in,
864 Uint4 SampleNum,
865 TIndx & KeyOffset)
866
867 {
868 // Meaning:
869 // a. Compute SampleNum*4
870 // b. Address this number into SamplePos (indexlease)
871 // c. Swap this number to compute Key offset.
872 // d. Add to beginning of file to get key data pointer.
873
874 bool ignore_case(true);
875
876 TIndx SampleOffset(m_KeySampleOffset);
877
878 if(m_PageSize != MEMORY_ONLY_PAGE_SIZE) {
879 SampleOffset += (m_NumSamples + 1) * sizeof(Uint4);
880 }
881
882 TIndx offset_begin = SampleOffset + (SampleNum * sizeof(Uint4));
883 //TIndx offset_end = offset_begin + sizeof(Uint4);
884
885 KeyOffset = SeqDB_GetStdOrd((Int4*)m_IndexLease.GetFileDataPtr(offset_begin));
886
887 Uint4 max_lines_2 = m_MaxLineSize * 2;
888
889 return x_DiffCharLease(term_in,
890 m_IndexLease,
891 m_IndexFname,
892 m_IndexFileLength,
893 max_lines_2,
894 KeyOffset,
895 ignore_case);
896
897 }
898
x_LoadPage(TIndx SampleNum1,TIndx SampleNum2,const char ** beginp,const char ** endp)899 void CSeqDBIsam::x_LoadPage(TIndx SampleNum1,
900 TIndx SampleNum2,
901 const char ** beginp,
902 const char ** endp)
903
904 {
905 // Load the appropriate page of terms into memory.
906
907 _ASSERT(SampleNum2 > SampleNum1);
908
909 TIndx begin_offset = m_KeySampleOffset + SampleNum1 * sizeof(Uint4);
910 //TIndx end_offset = m_KeySampleOffset + (SampleNum2 + 1) * sizeof(Uint4);
911
912 Uint4 * key_offsets((Uint4*)m_IndexLease.GetFileDataPtr(begin_offset));
913
914
915 Uint4 key_off1 = SeqDB_GetStdOrd(& key_offsets[0]);
916 Uint4 key_off2 = SeqDB_GetStdOrd(& key_offsets[SampleNum2 - SampleNum1]);
917
918 *beginp = (const char *) m_DataLease.GetFileDataPtr(m_DataFname,key_off1);
919 *endp = (const char *) m_DataLease.GetFileDataPtr(key_off2);
920 }
921
922
923 // ------------------------StringSearch--------------------------
924 // Purpose: Main search function of string search.
925 //
926 // Parameters: Key - interer to search
927 // Data - returned value
928 // Index - internal index in database
929 // Returns: ISAM Error Code
930 // NOTE: None
931 // --------------------------------------------------------------
932
933 CSeqDBIsam::EErrorCode
x_StringSearch(const string & term_in,vector<string> & terms_out,vector<string> & values_out,vector<TIndx> & indices_out)934 CSeqDBIsam::x_StringSearch(const string & term_in,
935 vector<string> & terms_out,
936 vector<string> & values_out,
937 vector<TIndx> & indices_out)
938
939 {
940 // These are always false; They may relate to the prior find_one /
941 // expand_to_many method of getting multiple OIDs.
942
943 bool short_match(false);
944 bool follow_match(false);
945
946 size_t preexisting_data_count = values_out.size();
947
948 if (m_Initialized == false) {
949 return eInitFailed;
950 }
951
952 if (x_OutOfBounds(term_in)) {
953 return eNotFound;
954 }
955
956 // We will set this option to avoid more complications
957 bool ignore_case = true;
958
959 // search the sample file first
960
961 TIndx Start(0);
962 TIndx Stop(m_NumSamples - 1);
963
964 int Length = (int) term_in.size();
965
966 TIndx SampleOffset(m_KeySampleOffset);
967
968 if(m_PageSize != MEMORY_ONLY_PAGE_SIZE) {
969 SampleOffset += (m_NumSamples + 1) * sizeof(Uint4);
970 }
971
972 int found_short(-1);
973
974 string short_term;
975 int SampleNum(-1);
976
977 while(Stop >= Start) {
978 SampleNum = ((Uint4)(Stop + Start)) >> 1;
979
980 TIndx KeyOffset(0);
981
982 int diff = x_DiffSample(term_in, SampleNum, KeyOffset);
983
984 // If this is an exact match, return the master term number.
985
986 const char * KeyData = (const char *)m_IndexLease.GetFileDataPtr(KeyOffset);
987 TIndx BytesToEnd = m_IndexFileLength - KeyOffset;
988
989 Uint4 max_lines_2 = m_MaxLineSize * 2;
990
991 if (BytesToEnd > (TIndx) max_lines_2) {
992 BytesToEnd = max_lines_2;
993 }
994
995 if (diff == -1) {
996 x_ExtractAllData(term_in,
997 SampleNum,
998 indices_out,
999 terms_out,
1000 values_out);
1001
1002
1003 return eNoError;
1004 }
1005
1006 // If the key is a superset of the sample term, backup until
1007 // just before the term.
1008
1009 if (short_match && (diff >= Length)) {
1010 if (SampleNum > 0)
1011 SampleNum--;
1012
1013 while(SampleNum > 0) {
1014 TIndx key_offset =
1015 x_GetIndexKeyOffset(SampleOffset,
1016 SampleNum);
1017
1018
1019 string prefix;
1020 x_GetIndexString(key_offset, Length, prefix, false);
1021
1022 if (ignore_case) {
1023 if (NStr::CompareNocase(prefix, term_in) != 0) {
1024 break;
1025 }
1026 } else {
1027 if (prefix != term_in) {
1028 break;
1029 }
1030 }
1031
1032 SampleNum--;
1033 }
1034
1035 found_short = SampleNum + 1;
1036
1037 TIndx key_offset =
1038 x_GetIndexKeyOffset(SampleOffset,
1039 SampleNum + 1);
1040
1041
1042 string prefix;
1043 x_GetIndexString(key_offset, max_lines_2, short_term, true);
1044
1045 break;
1046 } else {
1047 // If preceding is desired, note the key.
1048
1049 if (follow_match) {
1050 found_short = SampleNum;
1051
1052 x_GetIndexString(KeyOffset, max_lines_2, short_term, true);
1053 }
1054 }
1055
1056 // Otherwise, search for the next sample.
1057
1058 if (ignore_case
1059 ? tolower((unsigned char) term_in[diff]) < tolower((unsigned char) KeyData[diff])
1060 : term_in[diff] < KeyData[diff]) {
1061 Stop = --SampleNum;
1062 } else {
1063 Start = SampleNum + 1;
1064 }
1065 }
1066
1067
1068 // If the term is out of range altogether, report not finding it.
1069
1070 if ( (SampleNum < 0) || (SampleNum >= m_NumSamples)) {
1071 return eNotFound;
1072 }
1073
1074 // Load the appropriate page of terms into memory.
1075
1076 const char * beginp(0);
1077 const char * endp(0);
1078
1079 x_LoadPage(SampleNum, SampleNum + 1, & beginp, & endp);
1080
1081 // Search the page for the term.
1082
1083 x_ExtractPageData(term_in,
1084 m_PageSize * SampleNum,
1085 beginp,
1086 endp,
1087 indices_out,
1088 terms_out,
1089 values_out);
1090
1091 // For now the short and follow logic is not implemented.
1092
1093 EErrorCode rv(eNoError);
1094
1095 if (preexisting_data_count == values_out.size()) {
1096 rv = eNotFound;
1097 }
1098
1099 return rv;
1100 }
1101
CSeqDBIsam(CSeqDBAtlas & atlas,const string & dbname,char prot_nucl,char file_ext_char,ESeqDBIdType ident_type)1102 CSeqDBIsam::CSeqDBIsam(CSeqDBAtlas & atlas,
1103 const string & dbname,
1104 char prot_nucl,
1105 char file_ext_char,
1106 ESeqDBIdType ident_type)
1107 : m_Atlas (atlas),
1108 m_IdentType (ident_type),
1109 m_IndexLease (atlas),
1110 m_DataLease (atlas),
1111 m_Type (eNumeric),
1112 m_NumTerms (0),
1113 m_NumSamples (0),
1114 m_PageSize (0),
1115 m_MaxLineSize (0),
1116 m_IdxOption (0),
1117 m_Initialized (false),
1118 m_KeySampleOffset(0),
1119 m_TestNonUnique (true),
1120 m_FileStart (0),
1121 m_FirstOffset (0),
1122 m_LastOffset (0),
1123 m_LongId (false),
1124 m_TermSize (8)
1125 {
1126 // These are the types that readdb.c seems to use.
1127
1128 switch(ident_type) {
1129 case eGiId:
1130 case ePigId:
1131 case eTiId:
1132 m_Type = eNumeric;
1133 break;
1134
1135 case eStringId:
1136 case eHashId:
1137 m_Type = eString;
1138 break;
1139
1140 default:
1141 NCBI_THROW(CSeqDBException,
1142 eArgErr,
1143 "Error: ident type argument not valid");
1144 }
1145
1146 x_MakeFilenames(dbname,
1147 prot_nucl,
1148 file_ext_char,
1149 m_IndexFname,
1150 m_DataFname);
1151
1152 if (! (CFile(m_IndexFname).Exists() &&
1153 CFile(m_DataFname).Exists()) ) {
1154
1155 string msg("Error: Could not open input file (");
1156 msg += m_IndexFname + "/" + m_DataFname + ")";
1157 NCBI_THROW(CSeqDBException, eFileErr, msg);
1158 }
1159 m_IndexLease.Init(m_IndexFname);
1160 m_DataLease.Init(m_DataFname);
1161 if(m_Type == eNumeric) {
1162 m_PageSize = DEFAULT_NISAM_SIZE;
1163 } else {
1164 m_PageSize = DEFAULT_SISAM_SIZE;
1165 }
1166 if (eNoError !=x_InitSearch()) {
1167 m_Initialized = false;
1168 }
1169 x_FindIndexBounds();
1170 }
1171
x_MakeFilenames(const string & dbname,char prot_nucl,char file_ext_char,string & index_name,string & data_name)1172 void CSeqDBIsam::x_MakeFilenames(const string & dbname,
1173 char prot_nucl,
1174 char file_ext_char,
1175 string & index_name,
1176 string & data_name)
1177 {
1178 if (dbname.empty() ||
1179 (! isalpha((unsigned char) prot_nucl)) ||
1180 (! isalpha((unsigned char) file_ext_char))) {
1181
1182 NCBI_THROW(CSeqDBException,
1183 eArgErr,
1184 "Error: argument not valid");
1185 }
1186
1187 index_name.reserve(dbname.size() + 4);
1188 data_name.reserve(dbname.size() + 4);
1189
1190 index_name = dbname;
1191 index_name += '.';
1192 index_name += prot_nucl;
1193 index_name += file_ext_char;
1194
1195 data_name = index_name;
1196 index_name += 'i';
1197 data_name += 'd';
1198 }
1199
IndexExists(const string & dbname,char prot_nucl,char file_ext_char)1200 bool CSeqDBIsam::IndexExists(const string & dbname,
1201 char prot_nucl,
1202 char file_ext_char)
1203 {
1204 string iname, dname;
1205 x_MakeFilenames(dbname, prot_nucl, file_ext_char, iname, dname);
1206
1207 return CFile(iname).Exists() && CFile(dname).Exists();
1208 }
1209
~CSeqDBIsam()1210 CSeqDBIsam::~CSeqDBIsam()
1211 {
1212 UnLease();
1213 }
1214 //Remove this
UnLease()1215 void CSeqDBIsam::UnLease()
1216 {
1217 m_IndexLease.Clear();
1218 m_DataLease.Clear();
1219 }
1220
x_IdentToOid(Int8 ident,TOid & oid)1221 bool CSeqDBIsam::x_IdentToOid(Int8 ident, TOid & oid)
1222 {
1223 EErrorCode err =
1224 x_NumericSearch(ident, & oid, 0);
1225
1226 if (err == eNoError) {
1227 return true;
1228 }
1229
1230 oid = -1u; /* NCBI_FAKE_WARNING */
1231
1232 return false;
1233 }
1234
StringToOids(const string & acc,vector<TOid> & oids,bool adjusted,bool & version_check)1235 void CSeqDBIsam::StringToOids(const string & acc,
1236 vector<TOid> & oids,
1237 bool adjusted,
1238 bool & version_check)
1239
1240 {
1241 bool strip_version = version_check;
1242 version_check = false;
1243
1244 _ASSERT(m_IdentType == eStringId);
1245
1246 if(m_Initialized == false) {
1247 return;
1248 }
1249
1250 bool found = false;
1251
1252 string accession(string("gb|") + acc + "|");
1253 string locus_str(string("gb||") + acc);
1254
1255 EErrorCode err = eNoError;
1256
1257 vector<string> keys_out;
1258 vector<string> data_out;
1259 vector<TIndx> indices_out;
1260
1261 if (! adjusted) {
1262 if ((err = x_StringSearch(accession,
1263 keys_out,
1264 data_out,
1265 indices_out)) < 0) {
1266 return;
1267 }
1268
1269 if (err == eNoError) {
1270 found = true;
1271 }
1272
1273 if ((! found) &&
1274 (err = x_StringSearch(locus_str,
1275 keys_out,
1276 data_out,
1277 indices_out)) < 0) {
1278
1279 return;
1280 }
1281
1282 if (err != eNotFound) {
1283 found = true;
1284 }
1285 }
1286
1287 if ((! found) &&
1288 (err = x_StringSearch(acc,
1289 keys_out,
1290 data_out,
1291 indices_out)) < 0) {
1292
1293
1294 return;
1295 }
1296
1297 if (err != eNotFound) {
1298 found = true;
1299 }
1300
1301 if ((! found) && strip_version) {
1302 size_t pos = acc.find(".");
1303
1304 bool is_version = false;
1305
1306 if (pos != string::npos) {
1307 int ver_len = acc.size() - pos - 1;
1308
1309 is_version = (ver_len <= 3 && ver_len >= 1);
1310
1311 for(size_t vp = pos+1; vp < acc.size(); vp++) {
1312 if (! isdigit(acc[vp])) {
1313 is_version = false;
1314 break;
1315 }
1316 }
1317 }
1318
1319 if (is_version) {
1320 string nover(acc, 0, pos);
1321
1322 err = x_StringSearch(nover,
1323 keys_out,
1324 data_out,
1325 indices_out);
1326
1327
1328 if (data_out.size()) {
1329 version_check = true;
1330 }
1331
1332 if (err < 0) {
1333 return;
1334 }
1335 }
1336 }
1337
1338 if (err != eNotFound) {
1339 found = true;
1340 }
1341
1342 if (! found) {
1343 // Use CSeq_id to parse the id string and build a replacement,
1344 // FASTA type string. This allows some IDs, such as PDBs with
1345 // chains, such as '1qcfA' to be parsed.
1346
1347 string id;
1348
1349 try {
1350 CSeq_id seqid(acc, CSeq_id::fParse_RawText | CSeq_id::fParse_AnyLocal);
1351 id = seqid.AsFastaString();
1352 }
1353 catch(CSeqIdException &) {
1354 }
1355
1356 if (id.size() &&
1357 ((err = x_StringSearch(id,
1358 keys_out,
1359 data_out,
1360 indices_out)) < 0)) {
1361
1362 return;
1363 }
1364 }
1365
1366 if (err != eNotFound) {
1367 found = true;
1368 }
1369
1370 if (found) {
1371 ITERATE(vector<string>, iter, data_out) {
1372 oids.push_back(atoi((*iter).c_str()));
1373 }
1374 }
1375 }
1376
x_SparseStringToOids(const string &,vector<int> &,bool)1377 bool CSeqDBIsam::x_SparseStringToOids(const string &,
1378 vector<int> &,
1379 bool)
1380
1381 {
1382 cerr << " this should be derived from readdb_acc2fastaEx().." << endl;
1383 _TROUBLE;
1384 return false;
1385 }
1386
IdsToOids(int vol_start,int vol_end,CSeqDBGiList & ids)1387 void CSeqDBIsam::IdsToOids(int vol_start,
1388 int vol_end,
1389 CSeqDBGiList & ids)
1390
1391
1392 {
1393 // The vol_start parameter is needed because translations in the
1394 // GI list should refer to global OIDs, not per-volume OIDs.
1395
1396 switch (m_IdentType) {
1397 case eGiId:
1398 x_TranslateGiList<TGi>(vol_start, ids);
1399 break;
1400
1401 case eTiId:
1402 x_TranslateGiList<TTi>(vol_start, ids);
1403 break;
1404
1405 case eStringId:
1406 x_TranslateGiList<string>(vol_start, ids);
1407 break;
1408
1409 case ePigId:
1410 x_TranslateGiList<TPig>(vol_start, ids);
1411 break;
1412
1413 default:
1414 NCBI_THROW(CSeqDBException,
1415 eArgErr,
1416 "Error: Wrong type of idlist specified.");
1417 }
1418 }
1419
IdsToOids(int vol_start,int vol_end,CSeqDBNegativeList & ids)1420 void CSeqDBIsam::IdsToOids(int vol_start,
1421 int vol_end,
1422 CSeqDBNegativeList & ids)
1423
1424
1425 {
1426 // The vol_start parameter is needed because translations in the
1427 // GI list should refer to global OIDs, not per-volume OIDs.
1428
1429 _ASSERT(m_IdentType == eGiId || m_IdentType == eTiId || m_IdentType == eStringId);
1430
1431 //m_Atlas.Lock(locked);
1432
1433 ids.InsureOrder();
1434
1435 if ((m_IdentType == eGiId) && ids.GetNumGis()) {
1436 x_SearchNegativeMulti(vol_start,
1437 vol_end,
1438 ids,
1439 false);
1440
1441 }
1442
1443 if ((m_IdentType == eTiId) && ids.GetNumTis()) {
1444 x_SearchNegativeMulti(vol_start,
1445 vol_end,
1446 ids,
1447 true);
1448
1449 }
1450
1451 if(m_IdentType == eStringId && ids.GetNumSis()) {
1452 x_SearchNegativeMultiSeq(vol_start,
1453 vol_end,
1454 ids);
1455 //true,
1456
1457 }
1458 }
1459
x_FindIndexBounds()1460 void CSeqDBIsam::x_FindIndexBounds()
1461 {
1462 Int4 Start (0);
1463 Int4 Stop (m_NumSamples - 1);
1464
1465 //m_Atlas.Lock(locked);
1466
1467
1468 if (m_Type == eNumeric) {
1469 //
1470 // Get first key from data file
1471
1472 int num_elements(0);
1473 int start(0);
1474 const void * data_page(0);
1475
1476 x_MapDataPage(Start,
1477 start,
1478 num_elements,
1479 & data_page);
1480
1481
1482 _ASSERT(num_elements);
1483
1484 int elem_index = 0;
1485
1486 Int8 data_gi(0);
1487 int data_oid(-1);
1488
1489 x_GetDataElement(data_page,
1490 elem_index,
1491 data_gi,
1492 data_oid);
1493
1494 m_FirstKey.SetNumeric(data_gi);
1495
1496
1497 //
1498 // Get last key from data file
1499
1500 x_MapDataPage(Stop,
1501 start,
1502 num_elements,
1503 & data_page);
1504
1505
1506 _ASSERT(num_elements);
1507
1508 elem_index = num_elements - 1;
1509
1510 x_GetDataElement(data_page,
1511 elem_index,
1512 data_gi,
1513 data_oid);
1514
1515 m_LastKey.SetNumeric(data_gi);
1516 } else {
1517 //
1518 // Load the appropriate page of terms into memory.
1519
1520 const char * beginp(0);
1521 const char * endp(0);
1522
1523 //
1524 // Load the first page
1525
1526 x_LoadPage(Start, Start + 1, & beginp, & endp);
1527
1528 // Get first term
1529
1530 vector<string> keys_out;
1531 vector<string> data_out; // not used
1532
1533 x_ExtractData(beginp,
1534 endp,
1535 keys_out,
1536 data_out);
1537
1538 x_Lower(keys_out.front());
1539 m_FirstKey.SetString(keys_out.front());
1540
1541
1542 //
1543 // Load the last page
1544
1545 x_LoadPage(Stop, Stop + 1, & beginp, & endp);
1546
1547 // Advance to last item
1548
1549 const char * lastp(0);
1550 const char * indexp(beginp);
1551
1552 while (indexp < endp) {
1553 // Remember our new "last term" value.
1554
1555 lastp = indexp;
1556
1557 // Skip remainder of term, and any nulls after it.
1558
1559 while((indexp < endp) && s_SeqDBIsam_NullifyEOLs(*indexp)) {
1560 indexp++;
1561 }
1562 while((indexp < endp) && (! s_SeqDBIsam_NullifyEOLs(*indexp))) {
1563 indexp++;
1564 }
1565 }
1566
1567 // Get the last key
1568
1569 _ASSERT(lastp);
1570
1571 keys_out.clear();
1572 data_out.clear();
1573
1574 x_ExtractData(lastp,
1575 endp,
1576 keys_out,
1577 data_out);
1578
1579 x_Lower(keys_out.front());
1580 m_LastKey.SetString(keys_out.front());
1581 }
1582 }
1583
x_OutOfBounds(Int8 key)1584 bool CSeqDBIsam::x_OutOfBounds(Int8 key)
1585 {
1586 if (! (m_FirstKey.IsSet() && m_LastKey.IsSet())) {
1587 return false;
1588 }
1589
1590 _ASSERT(m_Type == eNumeric);
1591
1592 if (m_FirstKey.OutsideFirstBound(key)) {
1593 return true;
1594 }
1595
1596 if (m_LastKey.OutsideLastBound(key)) {
1597 return true;
1598 }
1599
1600 return false;
1601 }
1602
x_OutOfBounds(string key)1603 bool CSeqDBIsam::x_OutOfBounds(string key)
1604 {
1605 if (! (m_FirstKey.IsSet() && m_LastKey.IsSet())) {
1606 return false;
1607 }
1608
1609 _ASSERT(m_Type == eString);
1610
1611 x_Lower(key);
1612
1613 if (m_FirstKey.OutsideFirstBound(key)) {
1614 return true;
1615 }
1616
1617 if (m_LastKey.OutsideLastBound(key)) {
1618 return true;
1619 }
1620
1621 return false;
1622 }
1623
GetIdBounds(Int8 & low_id,Int8 & high_id,int & count)1624 void CSeqDBIsam::GetIdBounds(Int8 & low_id,
1625 Int8 & high_id,
1626 int & count)
1627
1628
1629 {
1630 if(m_Initialized == false) {
1631 count = 0;
1632 return;
1633 }
1634
1635 if (! (m_FirstKey.IsSet() && m_LastKey.IsSet())) {
1636 count = 0;
1637 return;
1638 }
1639
1640 low_id = m_FirstKey.GetNumeric();
1641 high_id = m_LastKey.GetNumeric();
1642 count = m_NumTerms;
1643 }
1644
GetIdBounds(string & low_id,string & high_id,int & count)1645 void CSeqDBIsam::GetIdBounds(string & low_id,
1646 string & high_id,
1647 int & count)
1648
1649
1650 {
1651 if(m_Initialized == false) {
1652 count = 0;
1653 return;
1654 }
1655
1656 if (! (m_FirstKey.IsSet() && m_LastKey.IsSet())) {
1657 count = 0;
1658 return;
1659 }
1660
1661 low_id = m_FirstKey.GetString();
1662 high_id = m_LastKey.GetString();
1663 count = m_NumTerms;
1664 }
1665
HashToOids(unsigned hash,vector<TOid> & oids)1666 void CSeqDBIsam::HashToOids(unsigned hash,
1667 vector<TOid> & oids)
1668
1669
1670 {
1671 _ASSERT(m_IdentType == eHashId);
1672 if(m_Initialized == false) {
1673 return;
1674 }
1675
1676 bool found = false;
1677
1678 string key(NStr::UIntToString(hash));
1679
1680 EErrorCode err = eNoError;
1681
1682 vector<string> keys_out;
1683 vector<string> data_out;
1684 vector<TIndx> indices_out;
1685
1686 if ((err = x_StringSearch(key,
1687 keys_out,
1688 data_out,
1689 indices_out)) < 0) {
1690
1691 return;
1692 }
1693
1694 if (err != eNotFound) {
1695 found = true;
1696 }
1697
1698 if (found) {
1699 ITERATE(vector<string>, iter, data_out) {
1700 oids.push_back(atoi(iter->c_str()));
1701 }
1702 }
1703 }
1704
1705 END_NCBI_SCOPE
1706
1707