1 /*
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Amelia Fong
27 *
28 */
29
30 #include <ncbi_pch.hpp>
31 #include <objtools/blast/seqdb_reader/seqidlist_reader.hpp>
32 #include <objects/seqloc/Seq_id.hpp>
33
34
35 BEGIN_NCBI_SCOPE
36 USING_SCOPE(objects);
37
38
39 class CSeqidlistRead
40 {
41 public:
42 CSeqidlistRead (CMemoryFile & file);
43
GetListInfo(SBlastSeqIdListInfo & info)44 void GetListInfo(SBlastSeqIdListInfo & info){ info = m_info;};
45 int GetIds(vector<CSeqDBGiList::SSiOid> & idlist);
46
47 private:
x_GetUint8()48 inline Uint8 x_GetUint8() { Uint8 rv= *((Uint8 *) m_Ptr); m_Ptr +=8; return rv;}
x_GetUint4()49 inline Uint4 x_GetUint4() { Uint4 rv= *((Uint4 *) m_Ptr); m_Ptr +=4; return rv;}
x_GetChar()50 inline char x_GetChar() {char rv = *m_Ptr; m_Ptr++; return rv;}
x_GetString(string & rv,Uint4 len)51 inline void x_GetString(string & rv, Uint4 len) {rv.assign (m_Ptr, len); m_Ptr+= len;}
52
53 char * m_Ptr;
54 char * m_EndPtr;
55 SBlastSeqIdListInfo m_info;
56 };
57
CSeqidlistRead(CMemoryFile & file)58 CSeqidlistRead::CSeqidlistRead (CMemoryFile & file) : m_Ptr((char*) file.GetPtr()), m_EndPtr((char*) file.GetPtr()) {
59 if(m_Ptr == NULL) {
60 NCBI_THROW(CSeqDBException, eArgErr, "Failed to map seqidlist file ");
61 }
62
63 char null_byte = x_GetChar();
64 if (null_byte == 0) {
65 m_info.is_v4 = false;
66 Uint8 file_size = file.GetFileSize();
67 m_info.file_size = x_GetUint8();
68 if (m_info.file_size != file_size) {
69 NCBI_THROW(CSeqDBException, eArgErr, "Invalid seqidlist file");
70 }
71 m_EndPtr += file_size;
72 m_info.num_ids = x_GetUint8();
73 Uint4 title_length = x_GetUint4();
74 x_GetString(m_info.title, title_length);
75 char file_create_date_length = x_GetChar();
76 x_GetString(m_info.create_date, file_create_date_length);
77 m_info.db_vol_length = x_GetUint8();
78 if(m_info.db_vol_length != 0) {
79 char file_db_create_date_length = x_GetChar();
80 x_GetString(m_info.db_create_date, file_db_create_date_length);
81 Uint4 file_vol_names_length = x_GetUint4();
82 x_GetString(m_info.db_vol_names, file_vol_names_length);
83 }
84 }
85 }
86
GetIds(vector<CSeqDBGiList::SSiOid> & idlist)87 int CSeqidlistRead::GetIds(vector<CSeqDBGiList::SSiOid> & idlist)
88 {
89 const unsigned char byte_max = 0xFF;
90 unsigned int i = 0;
91 idlist.clear();
92 idlist.resize(m_info.num_ids);
93 for(; (m_Ptr < m_EndPtr) && (i < m_info.num_ids); i++) {
94 unsigned char id_len = (unsigned char) x_GetChar();
95 if(id_len == byte_max) {
96 Uint4 long_id_len = x_GetUint4();
97 x_GetString(idlist[i].si, long_id_len);
98 }
99 else {
100 x_GetString(idlist[i].si, id_len);
101 }
102 }
103 if(i != m_info.num_ids) {
104 NCBI_THROW(CSeqDBException, eArgErr, "Invalid total num of ids in seqidlist file");
105 }
106
107 return i;
108 }
109
110
GetSeqidlist(CMemoryFile & file,vector<CSeqDBGiList::SSiOid> & idlist,SBlastSeqIdListInfo & list_info)111 int CBlastSeqidlistFile::GetSeqidlist(CMemoryFile & file, vector<CSeqDBGiList::SSiOid> & idlist,
112 SBlastSeqIdListInfo & list_info)
113 {
114
115 CSeqidlistRead list(file);
116 list.GetListInfo(list_info);
117 list.GetIds(idlist);
118
119 return list_info.num_ids;
120 }
121
GetSeqidlistInfo(const string & filename,SBlastSeqIdListInfo & list_info)122 int CBlastSeqidlistFile::GetSeqidlistInfo(const string & filename, SBlastSeqIdListInfo & list_info)
123 {
124 string file = SeqDB_ResolveDbPath(filename);
125 CMemoryFile in(file);
126 CSeqidlistRead list(in);
127 list.GetListInfo(list_info);
128 return list_info.num_ids;
129
130 }
131
PrintSeqidlistInfo(const string & filename,CNcbiOstream & os)132 void CBlastSeqidlistFile::PrintSeqidlistInfo(const string & filename, CNcbiOstream & os)
133 {
134 SBlastSeqIdListInfo list_info;
135 if (CBlastSeqidlistFile::GetSeqidlistInfo(filename, list_info) > 0) {
136 os <<"Num of Ids: " << list_info.num_ids << "\n";
137 os <<"Title: " << list_info.title << "\n";
138 os <<"Create Date: " << list_info.create_date << "\n";
139 if(list_info.db_vol_length > 0) {
140 os << "DB Info: \n";
141 os << "\t" << "Total Vol Length: " << list_info.db_vol_length << "\n";
142 os << "\t" << "DB Create Date: " << list_info.db_create_date << "\n";
143 os << "\t" << "DB Vols: ";
144 vector<string> vols;
145 NStr::Split(list_info.db_vol_names, " ", vols);
146 for(unsigned int i=0; i < vols.size(); i ++ ) {
147 os << "\n\t\t" << vols[i];
148 }
149 }
150 }
151 else {
152 os << "Seqidlist file is not in blast db version 5 format";
153 }
154 os << endl;
155 }
156
157 END_NCBI_SCOPE
158