1 /* $Id: cn3d_tools.cpp 518508 2016-11-03 18:40:18Z lanczyck $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Paul Thiessen
27 *
28 * File Description:
29 * Miscellaneous utility functions
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35
36 #if defined(__WXMSW__)
37 #include <windows.h>
38 #include <shellapi.h> // for ShellExecute, needed to launch browser
39
40 #elif defined(__WXGTK__)
41 #include <unistd.h>
42
43 #elif defined(__WXMAC__)
44 // full paths used to avoid adding extra -I option to point at FlatCarbon to compile flags for all modules...
45 // Under OSX 10.6 and earlier, /Developer was a root-level directory. With 10.8, it is buried under XCode's tools.
46 //#include "/Developer/Headers/FlatCarbon/Types.h"
47 //#include "/Developer/Headers/FlatCarbon/InternetConfig.h"
48 #include "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk/System/Library/Frameworks/ApplicationServices.framework/Versions/A/Frameworks/HIServices.framework/Versions/A/Headers/InternetConfig.h"
49 #endif
50
51 #include <corelib/ncbistd.hpp>
52 #include <corelib/ncbireg.hpp>
53
54 #include <objects/seq/Bioseq.hpp>
55 #include <objects/seqset/Seq_entry.hpp>
56 #include <objects/seqset/Bioseq_set.hpp>
57
58 #include "remove_header_conflicts.hpp"
59
60 #ifdef __WXMSW__
61 #include <windows.h>
62 #include <wx/msw/winundef.h>
63 #endif
64 #include <wx/wx.h>
65 #include <wx/file.h>
66 #include <wx/fileconf.h>
67
68 #include "cn3d_tools.hpp"
69 #include "asn_reader.hpp"
70
71 #include <memory>
72
73 USING_NCBI_SCOPE;
74 USING_SCOPE(objects);
75
76
77 BEGIN_SCOPE(Cn3D)
78
79 ///// Registry stuff /////
80
81 static CMemoryRegistry registry;
82 static string registryFile;
83 static bool registryChanged = false;
84
SetRegistryDefaults(void)85 static void SetRegistryDefaults(void)
86 {
87 // default log window startup
88 RegistrySetBoolean(REG_CONFIG_SECTION, REG_SHOW_LOG_ON_START, false);
89 RegistrySetString(REG_CONFIG_SECTION, REG_FAVORITES_NAME, NO_FAVORITES_FILE);
90 RegistrySetInteger(REG_CONFIG_SECTION, REG_MT_DIALOG_POS_X, 50);
91 RegistrySetInteger(REG_CONFIG_SECTION, REG_MT_DIALOG_POS_Y, 50);
92 RegistrySetInteger(REG_CONFIG_SECTION, REG_MT_DIALOG_SIZE_W, 400);
93 RegistrySetInteger(REG_CONFIG_SECTION, REG_MT_DIALOG_SIZE_H, 400);
94
95 // default animation controls
96 RegistrySetInteger(REG_ANIMATION_SECTION, REG_SPIN_DELAY, 50);
97 RegistrySetDouble(REG_ANIMATION_SECTION, REG_SPIN_INCREMENT, 2.0),
98 RegistrySetInteger(REG_ANIMATION_SECTION, REG_FRAME_DELAY, 500);
99
100 // default quality settings
101 RegistrySetInteger(REG_QUALITY_SECTION, REG_QUALITY_ATOM_SLICES, 10);
102 RegistrySetInteger(REG_QUALITY_SECTION, REG_QUALITY_ATOM_STACKS, 8);
103 RegistrySetInteger(REG_QUALITY_SECTION, REG_QUALITY_BOND_SIDES, 6);
104 RegistrySetInteger(REG_QUALITY_SECTION, REG_QUALITY_WORM_SIDES, 6);
105 RegistrySetInteger(REG_QUALITY_SECTION, REG_QUALITY_WORM_SEGMENTS, 6);
106 RegistrySetInteger(REG_QUALITY_SECTION, REG_QUALITY_HELIX_SIDES, 12);
107 RegistrySetBoolean(REG_QUALITY_SECTION, REG_HIGHLIGHTS_ON, true);
108 RegistrySetString(REG_QUALITY_SECTION, REG_PROJECTION_TYPE, "Perspective");
109
110 if (IsWindowedMode()) {
111 // default font for OpenGL (structure window)
112 wxFont *font = wxFont::New(
113 #if defined(__WXMSW__)
114 12,
115 #elif defined(__WXGTK__)
116 14,
117 #elif defined(__WXMAC__)
118 14,
119 #endif
120 wxSWISS, wxNORMAL, wxBOLD, false);
121 if (font && font->Ok())
122 RegistrySetString(REG_OPENGL_FONT_SECTION, REG_FONT_NATIVE_FONT_INFO, WX_TO_STD(font->GetNativeFontInfoDesc()));
123 else
124 ERRORMSG("Can't create default structure window font");
125
126 if (font) delete font;
127
128 // default font for sequence viewers
129 font = wxFont::New(
130 #if defined(__WXMSW__)
131 10,
132 #elif defined(__WXGTK__)
133 14,
134 #elif defined(__WXMAC__)
135 12,
136 #endif
137 wxROMAN, wxNORMAL, wxNORMAL, false);
138 if (font && font->Ok())
139 RegistrySetString(REG_SEQUENCE_FONT_SECTION, REG_FONT_NATIVE_FONT_INFO, WX_TO_STD(font->GetNativeFontInfoDesc()));
140 else
141 ERRORMSG("Can't create default sequence window font");
142 if (font) delete font;
143 }
144
145 // default cache settings
146 RegistrySetBoolean(REG_CACHE_SECTION, REG_CACHE_ENABLED, true);
147 if (GetPrefsDir().size() > 0)
148 RegistrySetString(REG_CACHE_SECTION, REG_CACHE_FOLDER, GetPrefsDir() + "cache");
149 else
150 RegistrySetString(REG_CACHE_SECTION, REG_CACHE_FOLDER, GetProgramDir() + "cache");
151 RegistrySetInteger(REG_CACHE_SECTION, REG_CACHE_MAX_SIZE, 25);
152
153 // default advanced options
154 RegistrySetBoolean(REG_ADVANCED_SECTION, REG_CDD_ANNOT_READONLY, true);
155 #ifdef __WXGTK__
156 RegistrySetString(REG_ADVANCED_SECTION, REG_BROWSER_LAUNCH,
157 // for launching netscape in a separate window
158 "( netscape -noraise -remote 'openURL(<URL>,new-window)' || netscape '<URL>' ) >/dev/null 2>&1 &"
159 // for launching netscape in an existing window
160 // "( netscape -raise -remote 'openURL(<URL>)' || netscape '<URL>' ) >/dev/null 2>&1 &"
161 );
162 #endif
163 RegistrySetInteger(REG_ADVANCED_SECTION, REG_MAX_N_STRUCTS, 10);
164 RegistrySetInteger(REG_ADVANCED_SECTION, REG_FOOTPRINT_RES, 0);
165
166 // default stereo options
167 RegistrySetDouble(REG_ADVANCED_SECTION, REG_STEREO_SEPARATION, 5.0);
168 RegistrySetBoolean(REG_ADVANCED_SECTION, REG_PROXIMAL_STEREO, true);
169 }
170
LoadRegistry(void)171 void LoadRegistry(void)
172 {
173 // first set up defaults, then override any/all with stuff from registry file
174 SetRegistryDefaults();
175
176 if (GetPrefsDir().size() > 0)
177 registryFile = GetPrefsDir() + "Preferences";
178 else
179 registryFile = GetProgramDir() + "Preferences";
180 auto_ptr<CNcbiIfstream> iniIn(new CNcbiIfstream(registryFile.c_str(), IOS_BASE::in | IOS_BASE::binary));
181 if (*iniIn) {
182 TRACEMSG("loading program registry " << registryFile);
183 registry.Read(*iniIn, (CNcbiRegistry::ePersistent | CNcbiRegistry::eOverride));
184 }
185
186 registryChanged = false;
187 }
188
SaveRegistry(void)189 void SaveRegistry(void)
190 {
191 if (registryChanged) {
192 auto_ptr<CNcbiOfstream> iniOut(new CNcbiOfstream(registryFile.c_str(), IOS_BASE::out));
193 if (*iniOut) {
194 // TESTMSG("saving program registry " << registryFile);
195 registry.Write(*iniOut);
196 }
197 }
198 }
199
RegistryIsValidInteger(const string & section,const string & name)200 bool RegistryIsValidInteger(const string& section, const string& name)
201 {
202 long value;
203 wxString regStr = registry.Get(section, name).c_str();
204 return (regStr.size() > 0 && regStr.ToLong(&value));
205 }
206
RegistryIsValidDouble(const string & section,const string & name)207 bool RegistryIsValidDouble(const string& section, const string& name)
208 {
209 double value;
210 wxString regStr = registry.Get(section, name).c_str();
211 return (regStr.size() > 0 && regStr.ToDouble(&value));
212 }
213
RegistryIsValidBoolean(const string & section,const string & name)214 bool RegistryIsValidBoolean(const string& section, const string& name)
215 {
216 string regStr = registry.Get(section, name);
217 return (regStr.size() > 0 && (
218 toupper((unsigned char) regStr[0]) == 'T' || toupper((unsigned char) regStr[0]) == 'F' ||
219 toupper((unsigned char) regStr[0]) == 'Y' || toupper((unsigned char) regStr[0]) == 'N'));
220 }
221
RegistryIsValidString(const string & section,const string & name)222 bool RegistryIsValidString(const string& section, const string& name)
223 {
224 string regStr = registry.Get(section, name);
225 return (regStr.size() > 0);
226 }
227
RegistryGetInteger(const string & section,const string & name,int * value)228 bool RegistryGetInteger(const string& section, const string& name, int *value)
229 {
230 wxString regStr = registry.Get(section, name).c_str();
231 long l;
232 if (regStr.size() == 0 || !regStr.ToLong(&l)) {
233 WARNINGMSG("Can't get long from registry: " << section << ", " << name);
234 return false;
235 }
236 *value = (int) l;
237 return true;
238 }
239
RegistryGetDouble(const string & section,const string & name,double * value)240 bool RegistryGetDouble(const string& section, const string& name, double *value)
241 {
242 wxString regStr = registry.Get(section, name).c_str();
243 if (regStr.size() == 0 || !regStr.ToDouble(value)) {
244 WARNINGMSG("Can't get double from registry: " << section << ", " << name);
245 return false;
246 }
247 return true;
248 }
249
RegistryGetBoolean(const string & section,const string & name,bool * value)250 bool RegistryGetBoolean(const string& section, const string& name, bool *value)
251 {
252 string regStr = registry.Get(section, name);
253 if (regStr.size() == 0 || !(
254 toupper((unsigned char) regStr[0]) == 'T' || toupper((unsigned char) regStr[0]) == 'F' ||
255 toupper((unsigned char) regStr[0]) == 'Y' || toupper((unsigned char) regStr[0]) == 'N')) {
256 WARNINGMSG("Can't get boolean from registry: " << section << ", " << name);
257 return false;
258 }
259 *value = (toupper((unsigned char) regStr[0]) == 'T' || toupper((unsigned char) regStr[0]) == 'Y');
260 return true;
261 }
262
RegistryGetString(const string & section,const string & name,string * value)263 bool RegistryGetString(const string& section, const string& name, string *value)
264 {
265 string regStr = registry.Get(section, name);
266 if (regStr.size() == 0) {
267 WARNINGMSG("Can't get string from registry: " << section << ", " << name);
268 return false;
269 }
270 *value = regStr;
271 return true;
272 }
273
RegistrySetInteger(const string & section,const string & name,int value)274 bool RegistrySetInteger(const string& section, const string& name, int value)
275 {
276 bool okay = registry.Set(section, name, NStr::IntToString(value), CNcbiRegistry::ePersistent);
277 if (!okay)
278 ERRORMSG("registry Set(" << section << ", " << name << ") failed");
279 else
280 registryChanged = true;
281 return okay;
282 }
283
RegistrySetDouble(const string & section,const string & name,double value)284 bool RegistrySetDouble(const string& section, const string& name, double value)
285 {
286 bool okay = registry.Set(section, name, NStr::DoubleToString(value), CNcbiRegistry::ePersistent);
287 if (!okay)
288 ERRORMSG("registry Set(" << section << ", " << name << ") failed");
289 else
290 registryChanged = true;
291 return okay;
292 }
293
RegistrySetBoolean(const string & section,const string & name,bool value,bool useYesOrNo)294 bool RegistrySetBoolean(const string& section, const string& name, bool value, bool useYesOrNo)
295 {
296 string regStr;
297 if (useYesOrNo)
298 regStr = value ? "yes" : "no";
299 else
300 regStr = value ? "true" : "false";
301 bool okay = registry.Set(section, name, regStr, CNcbiRegistry::ePersistent);
302 if (!okay)
303 ERRORMSG("registry Set(" << section << ", " << name << ") failed");
304 else
305 registryChanged = true;
306 return okay;
307 }
308
RegistrySetString(const string & section,const string & name,const string & value)309 bool RegistrySetString(const string& section, const string& name, const string& value)
310 {
311 bool okay = registry.Set(section, name, value, CNcbiRegistry::ePersistent);
312 if (!okay)
313 ERRORMSG("registry Set(" << section << ", " << name << ") failed");
314 else
315 registryChanged = true;
316 return okay;
317 }
318
319
320 ///// Misc stuff /////
321
322 // global strings for various directories - will include trailing path separator character
323 static string
324 workingDir, // current working directory
325 programDir, // directory where Cn3D executable lives
326 dataDir, // 'data' directory with external data files
327 prefsDir; // application preferences directory
GetWorkingDir(void)328 const string& GetWorkingDir(void) { return workingDir; }
GetProgramDir(void)329 const string& GetProgramDir(void) { return programDir; }
GetDataDir(void)330 const string& GetDataDir(void) { return dataDir; }
GetPrefsDir(void)331 const string& GetPrefsDir(void) { return prefsDir; }
332
SetUpWorkingDirectories(const char * argv0)333 void SetUpWorkingDirectories(const char* argv0)
334 {
335 // set up working directories
336 workingDir = wxGetCwd().c_str();
337 #ifdef __WXGTK__
338 if (getenv("CN3D_HOME") != NULL)
339 programDir = getenv("CN3D_HOME");
340 else
341 #endif
342 if (wxIsAbsolutePath(argv0))
343 programDir = wxPathOnly(argv0).c_str();
344 else if (wxPathOnly(argv0) == "")
345 programDir = workingDir;
346 else
347 programDir = workingDir + wxFILE_SEP_PATH + WX_TO_STD(wxPathOnly(argv0));
348 workingDir = workingDir + wxFILE_SEP_PATH;
349 programDir = programDir + wxFILE_SEP_PATH;
350
351 // find or create preferences folder
352 wxString localDir;
353 wxFileName::SplitPath(wxFileConfig::GetLocalFileName("unused"), &localDir, NULL, NULL);
354 wxString prefsDirLocal = localDir + wxFILE_SEP_PATH + "Cn3D_User";
355 wxString prefsDirProg = wxString(programDir.c_str()) + wxFILE_SEP_PATH + "Cn3D_User";
356 if (wxDirExists(prefsDirLocal))
357 prefsDir = prefsDirLocal.c_str();
358 else if (wxDirExists(prefsDirProg))
359 prefsDir = prefsDirProg.c_str();
360 else {
361 // try to create the folder
362 if (wxMkdir(prefsDirLocal) && wxDirExists(prefsDirLocal))
363 prefsDir = prefsDirLocal.c_str();
364 else if (wxMkdir(prefsDirProg) && wxDirExists(prefsDirProg))
365 prefsDir = prefsDirProg.c_str();
366 }
367 if (prefsDir.size() == 0)
368 WARNINGMSG("Can't create Cn3D_User folder at either:"
369 << "\n " << prefsDirLocal
370 << "\nor " << prefsDirProg);
371 else
372 prefsDir += wxFILE_SEP_PATH;
373
374 // set data dir, and register the path in C toolkit registry (mainly for BLAST code)
375 #ifdef __WXMAC__
376 dataDir = programDir + "../Resources/data/";
377 #else
378 dataDir = programDir + "data" + wxFILE_SEP_PATH;
379 #endif
380
381 TRACEMSG("working dir: " << workingDir.c_str());
382 TRACEMSG("program dir: " << programDir.c_str());
383 TRACEMSG("data dir: " << dataDir.c_str());
384 TRACEMSG("prefs dir: " << prefsDir.c_str());
385 }
386
387 #ifdef __WXMSW__
388 // code borrowed (and modified) from Nlm_MSWin_OpenDocument() in vibutils.c
MSWin_OpenDocument(const char * doc_name)389 static bool MSWin_OpenDocument(const char* doc_name)
390 {
391 int status = (int) ShellExecute(0, "open", doc_name, NULL, NULL, SW_SHOWNORMAL);
392 if (status <= 32) {
393 ERRORMSG("Unable to open document \"" << doc_name << "\", error = " << status);
394 return false;
395 }
396 return true;
397 }
398 #endif
399
400 #ifdef __WXMAC__
401 // CJL Hack ... pass the length of the string
MacLaunchURL(ConstStr255Param urlStr,long int len)402 static OSStatus MacLaunchURL(ConstStr255Param urlStr, long int len)
403 {
404 OSStatus err;
405 ICInstance inst;
406 long int startSel;
407 long int endSel;
408
409 err = ICStart(&inst, 'Cn3D');
410 if (err == noErr) {
411 #if !TARGET_CARBON
412 err = ICFindConfigFile(inst, 0, nil);
413 #endif
414 if (err == noErr) {
415 startSel = 0;
416 // endSel = strlen(urlStr); // OSX didn't like this: invalid conversion from
417 // 'const unsigned char*' to 'const char*' compiler error.
418 // ConstStr255Param is an unsigned char*. Mac developer docs do not seem to indicate the '255'
419 // means there are any length restrictions on such strings, and that implementations have some
420 // backing store for longer strings. But to be safe, I'm truncating this to 255.
421 // As used in Cn3D none of the URLs are terribly long ... except when multiple annotations are selected.
422 // (Also see CoreFoundation header CFBase.h; used in ncbi_os_mac.hpp Pstrncpy)
423 endSel = (len > 0 && len <= 255) ? len : 255;
424 err = ICLaunchURL(inst, "\p", urlStr, endSel, &startSel, &endSel);
425 }
426 ICStop(inst);
427 }
428 return err;
429 }
430 #endif
431
LaunchWebPage(const char * url)432 void LaunchWebPage(const char *url)
433 {
434 if(!url) return;
435 INFOMSG("launching url " << url);
436
437 #if defined(__WXMSW__)
438 if (!MSWin_OpenDocument(url)) {
439 ERRORMSG("Unable to launch browser");
440 }
441
442 #elif defined(__WXGTK__)
443 string command;
444 RegistryGetString(REG_ADVANCED_SECTION, REG_BROWSER_LAUNCH, &command);
445 size_t pos = 0;
446 while ((pos=command.find("<URL>", pos)) != string::npos)
447 command.replace(pos, 5, url);
448 TRACEMSG("launching browser with: " << command);
449 system(command.c_str());
450
451 #elif defined(__WXMAC__)
452 // CJL: hack of dubious generality to get the string length
453 // of a 'ConstStr255Param' type.
454 // Unclear if strings longer than 255 characters are safe. See notes above in MacLaunchURL.
455 unsigned int i = 0, l = strlen(url);
456 unsigned char uc_url[l+1];
457 for (; i < l && i < 255; ++i) uc_url[i] = (unsigned char) *(url + i);
458 uc_url[i] = '\0';
459 MacLaunchURL(uc_url, l);
460 #endif
461 }
462
FetchSequenceViaHTTP(const string & id)463 CRef < CBioseq > FetchSequenceViaHTTP(const string& id)
464 {
465 CSeq_entry seqEntry;
466 string err;
467 static const string host("eutils.ncbi.nlm.nih.gov"), path("/entrez/eutils/efetch.fcgi");
468 string args = string("rettype=asn.1&retmode=binary&maxplex=1&id=") + id;
469
470 // efetch doesn't seem to care whether db is protein or nucleotide, when using gi or accession... but that may change in the future
471 CRef < CBioseq > bioseq;
472 for (unsigned int round=1; round<=2 && bioseq.Empty(); ++round) {
473 string db = (round == 1) ? "protein" : "nucleotide";
474 INFOMSG("Trying to load sequence from URL " << host << path << '?' << (args + "&db=" + db));
475
476 bool ok = GetAsnDataViaHTTPS(host, path, (args + "&db=" + db), &seqEntry, &err);
477 if (ok) {
478 if (seqEntry.IsSeq())
479 bioseq.Reset(&(seqEntry.SetSeq()));
480 else if (seqEntry.IsSet() && seqEntry.GetSet().GetSeq_set().front()->IsSeq())
481 bioseq.Reset(&(seqEntry.SetSet().SetSeq_set().front()->SetSeq()));
482 else
483 WARNINGMSG("FetchSequenceViaHTTP() - confused by SeqEntry format");
484 } else {
485 WARNINGMSG("FetchSequenceViaHTTP() - HTTP Bioseq retrieval failed, err: " << err);
486 }
487 }
488 return bioseq;
489 }
490
491 static const string NCBIStdaaResidues("-ABCDEFGHIKLMNPQRSTVWXYZU*OJ");
492
493 // gives NCBIStdaa residue number for a character (or value for 'X' if char not found)
LookupNCBIStdaaNumberFromCharacter(char r)494 unsigned char LookupNCBIStdaaNumberFromCharacter(char r)
495 {
496 typedef map < char, unsigned char > Char2UChar;
497 static Char2UChar charMap;
498
499 if (charMap.size() == 0) {
500 for (unsigned int i=0; i<NCBIStdaaResidues.size(); ++i)
501 charMap[NCBIStdaaResidues[i]] = (unsigned char) i;
502 }
503
504 Char2UChar::const_iterator n = charMap.find(toupper((unsigned char) r));
505 if (n != charMap.end())
506 return n->second;
507 else
508 return charMap.find('X')->second;
509 }
510
LookupCharacterFromNCBIStdaaNumber(unsigned char n)511 char LookupCharacterFromNCBIStdaaNumber(unsigned char n)
512 {
513 if (n <= 27)
514 return NCBIStdaaResidues[n];
515 ERRORMSG("LookupCharacterFromNCBIStdaaNumber() - valid values are 0 - 27");
516 return '?';
517 }
518
Prosite2Regex(const string & prosite,string * regex,int * nGroups)519 bool Prosite2Regex(const string& prosite, string *regex, int *nGroups)
520 {
521 try {
522 // check allowed characters ('#' isn't ProSite, but is a special case used to match an 'X' residue character)
523 static const string allowed = "-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789[],(){}<>.#";
524 unsigned int i;
525 for (i=0; i<prosite.size(); ++i)
526 if (allowed.find(toupper((unsigned char) prosite[i])) == string::npos) break;
527 if (i != prosite.size()) throw "invalid ProSite character";
528 if (prosite[prosite.size() - 1] != '.') throw "ProSite pattern must end with '.'";
529
530 // translate into real regex syntax;
531 regex->erase();
532 *nGroups = 0;
533
534 bool inGroup = false;
535 for (unsigned int i=0; i<prosite.size(); ++i) {
536
537 // handle grouping and termini
538 bool characterHandled = true;
539 switch (prosite[i]) {
540 case '-': case '.': case '>':
541 if (inGroup) {
542 *regex += ')';
543 inGroup = false;
544 }
545 if (prosite[i] == '>') *regex += '$';
546 break;
547 case '<':
548 *regex += '^';
549 break;
550 default:
551 characterHandled = false;
552 break;
553 }
554 if (characterHandled) continue;
555 if (!inGroup && (
556 (isalpha((unsigned char) prosite[i]) && toupper((unsigned char) prosite[i]) != 'X') ||
557 prosite[i] == '[' || prosite[i] == '{' || prosite[i] == '#')) {
558 *regex += '(';
559 ++(*nGroups);
560 inGroup = true;
561 }
562
563 // translate syntax
564 switch (prosite[i]) {
565 case '(':
566 *regex += '{';
567 break;
568 case ')':
569 *regex += '}';
570 break;
571 case '{':
572 *regex += "[^";
573 break;
574 case '}':
575 *regex += ']';
576 break;
577 case 'X': case 'x':
578 *regex += '.';
579 break;
580 case '#':
581 *regex += 'X';
582 break;
583 default:
584 *regex += toupper((unsigned char) prosite[i]);
585 break;
586 }
587 }
588 }
589
590 catch (const char *err) {
591 ERRORMSG("Prosite2Regex() - " << err);
592 return false;
593 }
594
595 return true;
596 }
597
PrositePatternLength(const string & prosite)598 unsigned int PrositePatternLength(const string& prosite)
599 {
600 // ('#' isn't ProSite, but is a special case used to match an 'X' residue character)
601 static const string allowed = "-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789[],(){}<>.#";
602
603 // check allowed characters
604 unsigned int i;
605 for (i=0; i<prosite.size(); ++i)
606 if (allowed.find(toupper((unsigned char) prosite[i])) == string::npos) break;
607 if (i != prosite.size()) return 0;
608
609 bool hasOnlyX = true, stopParsing = false;
610 bool inBraces = false, inBrackets = false, inParens = false;
611 unsigned int length = 0;
612 int nFromParens;
613 string betweenParens;
614
615 for (i=0; i < prosite.size() && !stopParsing; ++i) {
616
617 // handle grouping and termini
618 bool characterHandled = true;
619 switch (prosite[i]) {
620 case '-': case '.': case '>': case '<':
621 break;
622 default:
623 characterHandled = false;
624 break;
625 }
626 if (inParens && prosite[i] != ')' && !characterHandled) betweenParens += prosite[i];
627 if (characterHandled) continue;
628
629 if (hasOnlyX && isalpha((unsigned char) prosite[i]) && toupper((unsigned char) prosite[i]) != 'X') {
630 hasOnlyX = false;
631 }
632
633 // translate syntax
634 switch (prosite[i]) {
635 case '(':
636 inParens = true;
637 break;
638 case ')':
639 nFromParens = NStr::StringToInt(betweenParens, NStr::fConvErr_NoThrow);
640
641 // Do not allow a variable number of repetitions.
642 // Also, length has already been incremented by 1 for whatever the (...) references
643 if (nFromParens > 0)
644 length += nFromParens - 1;
645 else
646 stopParsing = true;
647
648 inParens = false;
649 betweenParens.erase();
650 break;
651 case '{':
652 inBraces = true;
653 break;
654 case '}':
655 ++length;
656 inBraces = false;
657 break;
658 case '[':
659 inBrackets = true;
660 break;
661 case ']':
662 ++length;
663 inBrackets = false;
664 break;
665 default:
666 if (!inParens && !inBraces && !inBrackets) ++length;
667 break;
668 }
669 }
670
671 // Invalid pattern: Appear to have missed a closing parenthesis/brace/bracket.
672 if (inParens || inBrackets || inBraces) length = 0;
673
674 // Invalid pattern: Appear to have all 'X' characters.
675 if (hasOnlyX) length = 0;
676
677 // If there was some parsing error or prosite pattern allowed
678 // a match of indeterminate length, return 0.
679 if (stopParsing) length = 0;
680
681 return length;
682 }
683
684 END_SCOPE(Cn3D)
685