1 /* $Id: cn3d_cache.cpp 620017 2020-11-13 19:16:01Z hurwitz $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Paul Thiessen
27 *
28 * File Description:
29 * implements a basic cache for structures
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36
37 #include <objects/ncbimime/Ncbi_mime_asn1.hpp>
38 #include <objects/ncbimime/Biostruc_seq.hpp>
39 #include <objects/seqset/Seq_entry.hpp>
40 #include <objects/seqset/Bioseq_set.hpp>
41 #include <objects/mmdb1/Biostruc_id.hpp>
42 #include <objects/mmdb1/Mmdb_id.hpp>
43
44 #include "remove_header_conflicts.hpp"
45
46 // for file/directory manipulation stuff
47 #ifdef __WXMSW__
48 #include <windows.h>
49 #include <wx/msw/winundef.h>
50 #endif
51 #include <wx/wx.h>
52 #include <wx/datetime.h>
53 #include <wx/file.h>
54 #include <wx/filename.h>
55
56 #include "cn3d_cache.hpp"
57 #include "cn3d_tools.hpp"
58 #include "asn_reader.hpp"
59
60 USING_NCBI_SCOPE;
61 USING_SCOPE(objects);
62
63
BEGIN_SCOPE(Cn3D)64 BEGIN_SCOPE(Cn3D)
65
66 static string GetCacheFilePath(int mmdbID, EModel_type modelType)
67 {
68 string cachePath;
69 if (RegistryGetString(REG_CACHE_SECTION, REG_CACHE_FOLDER, &cachePath)) {
70 wxString cacheFile;
71 cacheFile.Printf("%s%c%i.%i", cachePath.c_str(), wxFILE_SEP_PATH, mmdbID, modelType);
72 cachePath = cacheFile.c_str();
73 } else
74 ERRORMSG("Can't get cache folder from registry");
75 return cachePath;
76 }
77
CreateCacheFolder(void)78 static bool CreateCacheFolder(void)
79 {
80 string cacheFolder;
81 if (!RegistryGetString(REG_CACHE_SECTION, REG_CACHE_FOLDER, &cacheFolder)) return false;
82 if (wxDirExists(cacheFolder.c_str())) return true;
83 bool okay = wxMkdir(cacheFolder.c_str());
84 TRACEMSG((okay ? "created" : "failed to create") << " folder " << cacheFolder);
85 return okay;
86 }
87
ExtractBioseqs(list<CRef<CSeq_entry>> & seqEntries,BioseqRefList * sequences)88 static void ExtractBioseqs(list < CRef < CSeq_entry > >& seqEntries, BioseqRefList *sequences)
89 {
90 list < CRef < CSeq_entry > >::iterator e, ee = seqEntries.end();
91 for (e=seqEntries.begin(); e!=ee; ++e) {
92 if ((*e)->IsSeq())
93 sequences->push_back(CRef<CBioseq>(&((*e)->SetSeq())));
94 else
95 ExtractBioseqs((*e)->SetSet().SetSeq_set(), sequences);
96 }
97 }
98
ExtractBiostrucAndBioseqs(CNcbi_mime_asn1 & mime,CRef<CBiostruc> & biostruc,BioseqRefList * sequences)99 bool ExtractBiostrucAndBioseqs(CNcbi_mime_asn1& mime,
100 CRef < CBiostruc >& biostruc, BioseqRefList *sequences)
101 {
102 if (!mime.IsStrucseq()) {
103 ERRORMSG("ExtractBiostrucAndBioseqs() - expecting strucseq mime");
104 return false;
105 }
106
107 // copy mime's biostruc into existing object
108 biostruc.Reset(&(mime.SetStrucseq().SetStructure()));
109
110 // extract Bioseqs
111 if (sequences) {
112 sequences->clear();
113 ExtractBioseqs(mime.SetStrucseq().SetSequences(), sequences);
114 }
115
116 return true;
117 }
118
GetStructureFromCacheFolder(int mmdbID,EModel_type modelType)119 static CNcbi_mime_asn1 * GetStructureFromCacheFolder(int mmdbID, EModel_type modelType)
120 {
121 // try to load from cache
122 INFOMSG("looking for " << mmdbID << " (model type " << (int) modelType << ") in cache:");
123 string err, cacheFile = GetCacheFilePath(mmdbID, modelType);
124 CRef < CNcbi_mime_asn1 > mime(new CNcbi_mime_asn1());
125 SetDiagPostLevel(eDiag_Fatal); // ignore all but Fatal errors while reading data
126 bool gotFile = ReadASNFromFile(cacheFile.c_str(), mime.GetPointer(), true, &err);
127 SetDiagPostLevel(eDiag_Info);
128 if (!gotFile) {
129 WARNINGMSG("failed to load " << mmdbID
130 << " (model type " << (int) modelType << ") from cache: " << err);
131 return NULL;
132 }
133
134 // if successful, 'touch' the file to mark it as recently used
135 INFOMSG("loaded " << cacheFile);
136 wxFileName fn(cacheFile.c_str());
137 if (!fn.Touch())
138 WARNINGMSG("error touching " << cacheFile);
139
140 return mime.Release();
141 }
142
143 // If assemblyId = -1, use the predefined 'default' assembly.
144 // Otherwise, get the specific assembly requested, where
145 // assemblyId = 0 means the ASU, and PDB-defined assemblies
146 // are indexed sequentially from 1.
GetStructureViaHTTPAndAddToCache(const string & uid,int mmdbID,EModel_type modelType,int assemblyId=0)147 static CNcbi_mime_asn1 * GetStructureViaHTTPAndAddToCache(
148 const string& uid, int mmdbID, EModel_type modelType, int assemblyId = 0)
149 {
150 string host, path, args;
151
152 if (assemblyId == 0) {
153 // construct URL [mmdbsrv.cgi]
154
155 // this is for a test release for Gabi for testing long pdb chain ids. Dave 10/19/20.
156 // this is from Dachuan, showing what the test URL looks like, and an example.
157 // https://dev.ncbi.nlm.nih.gov/Structure/pdbtest/[mmdb|cdd|vast|vastplus|wrbsp]/[*].cgi
158 // https://dev.ncbi.nlm.nih.gov/Structure/pdbtest/mmdb/mmdbsrv.cgi
159 host = "dev.ncbi.nlm.nih.gov"; // *
160 path = "/Structure/pdbtest/mmdb/mmdbsrv.cgi"; // *
161
162 // this is the original, prior to making the test release for Gabi.
163 // host = "www.ncbi.nlm.nih.gov";
164 // path = "/Structure/mmdb/mmdbsrv.cgi";
165
166 args = "save=Save&dopt=j&uid=";
167 if (mmdbID > 0)
168 args += NStr::IntToString(mmdbID);
169 else // assume PDB id
170 args += uid;
171 args += "&Complexity=";
172 switch (modelType) {
173 case eModel_type_ncbi_all_atom: args += "3"; break;
174 case eModel_type_pdb_model: args += "4"; break;
175 case eModel_type_ncbi_backbone:
176 default:
177 args += "2"; break;
178 }
179 }
180
181 else {
182 // construct URL [mmdb_strview.cgi]
183 host = "www.ncbi.nlm.nih.gov";
184 path = "/Structure/mmdb/mmdb_strview.cgi";
185 args = "program=cn3d&display=1&uid=";
186 if (mmdbID > 0)
187 args += NStr::IntToString(mmdbID);
188 else // assume PDB id
189 args += uid;
190 args += "&complexity=";
191 switch (modelType) {
192 case eModel_type_ncbi_vector: args += "1"; break;
193 case eModel_type_ncbi_all_atom: args += "3"; break;
194 case eModel_type_pdb_model: args += "4"; break;
195 case eModel_type_ncbi_backbone:
196 default:
197 args += "2"; break;
198 }
199 args += "&buidx=" + NStr::IntToString(assemblyId);
200 }
201
202 // load from network
203 INFOMSG("Trying to load structure data from " << host << path << '?' << args);
204 string err;
205 CRef < CNcbi_mime_asn1 > mime(new CNcbi_mime_asn1());
206
207 if (!GetAsnDataViaHTTPS(host, path, args, mime.GetPointer(), &err) ||
208 !mime->IsStrucseq()) {
209 ERRORMSG("Failed to read structure " << uid << " from network\nreason: " << err);
210 return NULL;
211
212 } else {
213 // get MMDB ID from biostruc if not already known
214 if (mmdbID == 0) {
215 if (mime->GetStrucseq().GetStructure().GetId().front()->IsMmdb_id())
216 mmdbID = mime->GetStrucseq().GetStructure().GetId().front()->GetMmdb_id().Get();
217 else {
218 ERRORMSG("Can't get MMDB ID from Biostruc!");
219 return mime.Release();
220 }
221 }
222
223 bool cacheEnabled;
224 if (RegistryGetBoolean(REG_CACHE_SECTION, REG_CACHE_ENABLED, &cacheEnabled) && cacheEnabled) {
225 // add to cache
226 if (CreateCacheFolder() &&
227 WriteASNToFile(GetCacheFilePath(mmdbID, modelType).c_str(), *mime, true, &err)) {
228 INFOMSG("stored " << mmdbID << " (model type " << (int) modelType << ") in cache");
229 // trim cache to appropriate size if we've added a new file
230 int size;
231 if (RegistryGetInteger(REG_CACHE_SECTION, REG_CACHE_MAX_SIZE, &size))
232 TruncateCache(size);
233 } else {
234 WARNINGMSG("Failed to write structure to cache folder");
235 if (err.size() > 0) WARNINGMSG("reason: " << err);
236 }
237 }
238 }
239
240 return mime.Release();
241 }
242
LoadStructureViaCache(const std::string & uid,ncbi::objects::EModel_type modelType,int assemblyId)243 CNcbi_mime_asn1 * LoadStructureViaCache(const std::string& uid, ncbi::objects::EModel_type modelType, int assemblyId)
244 {
245 // determine whether this is an integer MMDB ID or alphanumeric PDB ID
246 int mmdbID = 0;
247 if (uid.size() == 4 && (isalpha((unsigned char) uid[1]) || isalpha((unsigned char) uid[2]) || isalpha((unsigned char) uid[3]))) {
248 TRACEMSG("Fetching PDB " << uid);
249 } else { // mmdb id
250 unsigned long tmp;
251 if (wxString(uid.c_str()).ToULong(&tmp)) {
252 mmdbID = (int) tmp;
253 } else {
254 ERRORMSG("LoadStructureViaCache() - invalid uid " << uid);
255 return NULL;
256 }
257 TRACEMSG("Fetching MMDB " << mmdbID);
258 }
259
260 // try loading from local cache folder first, if cache enabled in registry (but only with known mmdb id)
261 bool cacheEnabled;
262 CNcbi_mime_asn1 *mime = NULL;
263 if (mmdbID > 0 &&
264 RegistryGetBoolean(REG_CACHE_SECTION, REG_CACHE_ENABLED, &cacheEnabled) &&
265 cacheEnabled)
266 mime = GetStructureFromCacheFolder(mmdbID, modelType);
267
268 // otherwise, load via HTTP (and save in cache folder)
269 if (!mime)
270 mime = GetStructureViaHTTPAndAddToCache(uid, mmdbID, modelType, assemblyId);
271
272 return mime;
273 }
274
LoadStructureViaCache(const std::string & uid,ncbi::objects::EModel_type modelType,int assemblyId,CRef<CBiostruc> & biostruc,BioseqRefList * sequences)275 bool LoadStructureViaCache(const std::string& uid, ncbi::objects::EModel_type modelType, int assemblyId,
276 CRef < CBiostruc >& biostruc, BioseqRefList *sequences)
277 {
278 CRef < CNcbi_mime_asn1 > mime(LoadStructureViaCache(uid, modelType, assemblyId));
279
280 // debugging
281 // string errStr;
282 // WriteASNToFile("mime_data.txt", mime.GetObject(), false, &errStr);
283
284 return (mime.NotEmpty() && ExtractBiostrucAndBioseqs(*mime, biostruc, sequences));
285 }
286
TruncateCache(unsigned int maxSize)287 void TruncateCache(unsigned int maxSize)
288 {
289 string cacheFolder;
290 if (!RegistryGetString(REG_CACHE_SECTION, REG_CACHE_FOLDER, &cacheFolder) ||
291 !wxDirExists(cacheFolder.c_str())) {
292 WARNINGMSG("can't find cache folder");
293 return;
294 }
295 INFOMSG("truncating cache to " << maxSize << " MB");
296
297 wxString cacheFolderFiles;
298 cacheFolderFiles.Printf("%s%c*", cacheFolder.c_str(), wxFILE_SEP_PATH);
299
300 // empty directory if maxSize <= 0
301 if (maxSize <= 0) {
302 wxString f;
303 while ((f=wxFindFirstFile(cacheFolderFiles, wxFILE)).size() > 0) {
304 if (!wxRemoveFile(f))
305 WARNINGMSG("can't remove file " << f);
306 }
307 return;
308 }
309
310 // otherwise, add up file sizes and keep deleting oldest until total size <= max
311 unsigned long totalSize = 0;
312 wxString oldestFileName;
313 do {
314
315 // if totalSize > 0, then we've already scanned the folder and know it's too big,
316 // so delete oldest file
317 if (totalSize > 0 && !wxRemoveFile(oldestFileName))
318 WARNINGMSG("can't remove file " << oldestFileName);
319
320 // loop through files, finding oldest and calculating total size
321 totalSize = 0;
322 time_t oldestFileDate = wxDateTime::GetTimeNow(), date;
323 wxString file = wxFindFirstFile(cacheFolderFiles, wxFILE);
324 for (; file.size() > 0; file = wxFindNextFile()) {
325 date = wxFileModificationTime(file);
326 if (date < oldestFileDate) {
327 oldestFileDate = date;
328 oldestFileName = file;
329 }
330 wxFile wx_file(file, wxFile::read);
331 if (wx_file.IsOpened()) {
332 totalSize += wx_file.Length();
333 wx_file.Close();
334 } else
335 WARNINGMSG("wxFile failed to open " << file);
336 }
337 INFOMSG("total size: " << totalSize << " oldest file: " << oldestFileName.c_str());
338
339 } while (totalSize > maxSize * 1024 * 1024);
340 }
341
342 END_SCOPE(Cn3D)
343