1 /*  $Id: cn3d_tools.cpp 518508 2016-11-03 18:40:18Z lanczyck $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors:  Paul Thiessen
27 *
28 * File Description:
29 *      Miscellaneous utility functions
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
36 #if defined(__WXMSW__)
37 #include <windows.h>
38 #include <shellapi.h>   // for ShellExecute, needed to launch browser
39 
40 #elif defined(__WXGTK__)
41 #include <unistd.h>
42 
43 #elif defined(__WXMAC__)
44 // full paths used to avoid adding extra -I option to point at FlatCarbon to compile flags for all modules...
45 // Under OSX 10.6 and earlier, /Developer was a root-level directory.  With 10.8, it is buried under XCode's tools.
46 //#include "/Developer/Headers/FlatCarbon/Types.h"
47 //#include "/Developer/Headers/FlatCarbon/InternetConfig.h"
48 #include "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk/System/Library/Frameworks/ApplicationServices.framework/Versions/A/Frameworks/HIServices.framework/Versions/A/Headers/InternetConfig.h"
49 #endif
50 
51 #include <corelib/ncbistd.hpp>
52 #include <corelib/ncbireg.hpp>
53 
54 #include <objects/seq/Bioseq.hpp>
55 #include <objects/seqset/Seq_entry.hpp>
56 #include <objects/seqset/Bioseq_set.hpp>
57 
58 #include "remove_header_conflicts.hpp"
59 
60 #ifdef __WXMSW__
61 #include <windows.h>
62 #include <wx/msw/winundef.h>
63 #endif
64 #include <wx/wx.h>
65 #include <wx/file.h>
66 #include <wx/fileconf.h>
67 
68 #include "cn3d_tools.hpp"
69 #include "asn_reader.hpp"
70 
71 #include <memory>
72 
73 USING_NCBI_SCOPE;
74 USING_SCOPE(objects);
75 
76 
77 BEGIN_SCOPE(Cn3D)
78 
79 ///// Registry stuff /////
80 
81 static CMemoryRegistry registry;
82 static string registryFile;
83 static bool registryChanged = false;
84 
SetRegistryDefaults(void)85 static void SetRegistryDefaults(void)
86 {
87     // default log window startup
88     RegistrySetBoolean(REG_CONFIG_SECTION, REG_SHOW_LOG_ON_START, false);
89     RegistrySetString(REG_CONFIG_SECTION, REG_FAVORITES_NAME, NO_FAVORITES_FILE);
90     RegistrySetInteger(REG_CONFIG_SECTION, REG_MT_DIALOG_POS_X, 50);
91     RegistrySetInteger(REG_CONFIG_SECTION, REG_MT_DIALOG_POS_Y, 50);
92     RegistrySetInteger(REG_CONFIG_SECTION, REG_MT_DIALOG_SIZE_W, 400);
93     RegistrySetInteger(REG_CONFIG_SECTION, REG_MT_DIALOG_SIZE_H, 400);
94 
95     // default animation controls
96     RegistrySetInteger(REG_ANIMATION_SECTION, REG_SPIN_DELAY, 50);
97     RegistrySetDouble(REG_ANIMATION_SECTION, REG_SPIN_INCREMENT, 2.0),
98     RegistrySetInteger(REG_ANIMATION_SECTION, REG_FRAME_DELAY, 500);
99 
100     // default quality settings
101     RegistrySetInteger(REG_QUALITY_SECTION, REG_QUALITY_ATOM_SLICES, 10);
102     RegistrySetInteger(REG_QUALITY_SECTION, REG_QUALITY_ATOM_STACKS, 8);
103     RegistrySetInteger(REG_QUALITY_SECTION, REG_QUALITY_BOND_SIDES, 6);
104     RegistrySetInteger(REG_QUALITY_SECTION, REG_QUALITY_WORM_SIDES, 6);
105     RegistrySetInteger(REG_QUALITY_SECTION, REG_QUALITY_WORM_SEGMENTS, 6);
106     RegistrySetInteger(REG_QUALITY_SECTION, REG_QUALITY_HELIX_SIDES, 12);
107     RegistrySetBoolean(REG_QUALITY_SECTION, REG_HIGHLIGHTS_ON, true);
108     RegistrySetString(REG_QUALITY_SECTION, REG_PROJECTION_TYPE, "Perspective");
109 
110     if (IsWindowedMode()) {
111         // default font for OpenGL (structure window)
112         wxFont *font = wxFont::New(
113 #if defined(__WXMSW__)
114             12,
115 #elif defined(__WXGTK__)
116             14,
117 #elif defined(__WXMAC__)
118             14,
119 #endif
120             wxSWISS, wxNORMAL, wxBOLD, false);
121         if (font && font->Ok())
122             RegistrySetString(REG_OPENGL_FONT_SECTION, REG_FONT_NATIVE_FONT_INFO, WX_TO_STD(font->GetNativeFontInfoDesc()));
123         else
124             ERRORMSG("Can't create default structure window font");
125 
126         if (font) delete font;
127 
128         // default font for sequence viewers
129         font = wxFont::New(
130 #if defined(__WXMSW__)
131             10,
132 #elif defined(__WXGTK__)
133             14,
134 #elif defined(__WXMAC__)
135             12,
136 #endif
137             wxROMAN, wxNORMAL, wxNORMAL, false);
138         if (font && font->Ok())
139             RegistrySetString(REG_SEQUENCE_FONT_SECTION, REG_FONT_NATIVE_FONT_INFO, WX_TO_STD(font->GetNativeFontInfoDesc()));
140         else
141             ERRORMSG("Can't create default sequence window font");
142         if (font) delete font;
143     }
144 
145     // default cache settings
146     RegistrySetBoolean(REG_CACHE_SECTION, REG_CACHE_ENABLED, true);
147     if (GetPrefsDir().size() > 0)
148         RegistrySetString(REG_CACHE_SECTION, REG_CACHE_FOLDER, GetPrefsDir() + "cache");
149     else
150         RegistrySetString(REG_CACHE_SECTION, REG_CACHE_FOLDER, GetProgramDir() + "cache");
151     RegistrySetInteger(REG_CACHE_SECTION, REG_CACHE_MAX_SIZE, 25);
152 
153     // default advanced options
154     RegistrySetBoolean(REG_ADVANCED_SECTION, REG_CDD_ANNOT_READONLY, true);
155 #ifdef __WXGTK__
156     RegistrySetString(REG_ADVANCED_SECTION, REG_BROWSER_LAUNCH,
157         // for launching netscape in a separate window
158         "( netscape -noraise -remote 'openURL(<URL>,new-window)' || netscape '<URL>' ) >/dev/null 2>&1 &"
159         // for launching netscape in an existing window
160 //        "( netscape -raise -remote 'openURL(<URL>)' || netscape '<URL>' ) >/dev/null 2>&1 &"
161     );
162 #endif
163     RegistrySetInteger(REG_ADVANCED_SECTION, REG_MAX_N_STRUCTS, 10);
164     RegistrySetInteger(REG_ADVANCED_SECTION, REG_FOOTPRINT_RES, 0);
165 
166     // default stereo options
167     RegistrySetDouble(REG_ADVANCED_SECTION, REG_STEREO_SEPARATION, 5.0);
168     RegistrySetBoolean(REG_ADVANCED_SECTION, REG_PROXIMAL_STEREO, true);
169 }
170 
LoadRegistry(void)171 void LoadRegistry(void)
172 {
173     // first set up defaults, then override any/all with stuff from registry file
174     SetRegistryDefaults();
175 
176     if (GetPrefsDir().size() > 0)
177         registryFile = GetPrefsDir() + "Preferences";
178     else
179         registryFile = GetProgramDir() + "Preferences";
180     auto_ptr<CNcbiIfstream> iniIn(new CNcbiIfstream(registryFile.c_str(), IOS_BASE::in | IOS_BASE::binary));
181     if (*iniIn) {
182         TRACEMSG("loading program registry " << registryFile);
183         registry.Read(*iniIn, (CNcbiRegistry::ePersistent | CNcbiRegistry::eOverride));
184     }
185 
186     registryChanged = false;
187 }
188 
SaveRegistry(void)189 void SaveRegistry(void)
190 {
191     if (registryChanged) {
192         auto_ptr<CNcbiOfstream> iniOut(new CNcbiOfstream(registryFile.c_str(), IOS_BASE::out));
193         if (*iniOut) {
194 //            TESTMSG("saving program registry " << registryFile);
195             registry.Write(*iniOut);
196         }
197     }
198 }
199 
RegistryIsValidInteger(const string & section,const string & name)200 bool RegistryIsValidInteger(const string& section, const string& name)
201 {
202     long value;
203     wxString regStr = registry.Get(section, name).c_str();
204     return (regStr.size() > 0 && regStr.ToLong(&value));
205 }
206 
RegistryIsValidDouble(const string & section,const string & name)207 bool RegistryIsValidDouble(const string& section, const string& name)
208 {
209     double value;
210     wxString regStr = registry.Get(section, name).c_str();
211     return (regStr.size() > 0 && regStr.ToDouble(&value));
212 }
213 
RegistryIsValidBoolean(const string & section,const string & name)214 bool RegistryIsValidBoolean(const string& section, const string& name)
215 {
216     string regStr = registry.Get(section, name);
217     return (regStr.size() > 0 && (
218         toupper((unsigned char) regStr[0]) == 'T' || toupper((unsigned char) regStr[0]) == 'F' ||
219         toupper((unsigned char) regStr[0]) == 'Y' || toupper((unsigned char) regStr[0]) == 'N'));
220 }
221 
RegistryIsValidString(const string & section,const string & name)222 bool RegistryIsValidString(const string& section, const string& name)
223 {
224     string regStr = registry.Get(section, name);
225     return (regStr.size() > 0);
226 }
227 
RegistryGetInteger(const string & section,const string & name,int * value)228 bool RegistryGetInteger(const string& section, const string& name, int *value)
229 {
230     wxString regStr = registry.Get(section, name).c_str();
231     long l;
232     if (regStr.size() == 0 || !regStr.ToLong(&l)) {
233         WARNINGMSG("Can't get long from registry: " << section << ", " << name);
234         return false;
235     }
236     *value = (int) l;
237     return true;
238 }
239 
RegistryGetDouble(const string & section,const string & name,double * value)240 bool RegistryGetDouble(const string& section, const string& name, double *value)
241 {
242     wxString regStr = registry.Get(section, name).c_str();
243     if (regStr.size() == 0 || !regStr.ToDouble(value)) {
244         WARNINGMSG("Can't get double from registry: " << section << ", " << name);
245         return false;
246     }
247     return true;
248 }
249 
RegistryGetBoolean(const string & section,const string & name,bool * value)250 bool RegistryGetBoolean(const string& section, const string& name, bool *value)
251 {
252     string regStr = registry.Get(section, name);
253     if (regStr.size() == 0 || !(
254             toupper((unsigned char) regStr[0]) == 'T' || toupper((unsigned char) regStr[0]) == 'F' ||
255             toupper((unsigned char) regStr[0]) == 'Y' || toupper((unsigned char) regStr[0]) == 'N')) {
256         WARNINGMSG("Can't get boolean from registry: " << section << ", " << name);
257         return false;
258     }
259     *value = (toupper((unsigned char) regStr[0]) == 'T' || toupper((unsigned char) regStr[0]) == 'Y');
260     return true;
261 }
262 
RegistryGetString(const string & section,const string & name,string * value)263 bool RegistryGetString(const string& section, const string& name, string *value)
264 {
265     string regStr = registry.Get(section, name);
266     if (regStr.size() == 0) {
267         WARNINGMSG("Can't get string from registry: " << section << ", " << name);
268         return false;
269     }
270     *value = regStr;
271     return true;
272 }
273 
RegistrySetInteger(const string & section,const string & name,int value)274 bool RegistrySetInteger(const string& section, const string& name, int value)
275 {
276     bool okay = registry.Set(section, name, NStr::IntToString(value), CNcbiRegistry::ePersistent);
277     if (!okay)
278         ERRORMSG("registry Set(" << section << ", " << name << ") failed");
279     else
280         registryChanged = true;
281     return okay;
282 }
283 
RegistrySetDouble(const string & section,const string & name,double value)284 bool RegistrySetDouble(const string& section, const string& name, double value)
285 {
286     bool okay = registry.Set(section, name, NStr::DoubleToString(value), CNcbiRegistry::ePersistent);
287     if (!okay)
288         ERRORMSG("registry Set(" << section << ", " << name << ") failed");
289     else
290         registryChanged = true;
291     return okay;
292 }
293 
RegistrySetBoolean(const string & section,const string & name,bool value,bool useYesOrNo)294 bool RegistrySetBoolean(const string& section, const string& name, bool value, bool useYesOrNo)
295 {
296     string regStr;
297     if (useYesOrNo)
298         regStr = value ? "yes" : "no";
299     else
300         regStr = value ? "true" : "false";
301     bool okay = registry.Set(section, name, regStr, CNcbiRegistry::ePersistent);
302     if (!okay)
303         ERRORMSG("registry Set(" << section << ", " << name << ") failed");
304     else
305         registryChanged = true;
306     return okay;
307 }
308 
RegistrySetString(const string & section,const string & name,const string & value)309 bool RegistrySetString(const string& section, const string& name, const string& value)
310 {
311     bool okay = registry.Set(section, name, value, CNcbiRegistry::ePersistent);
312     if (!okay)
313         ERRORMSG("registry Set(" << section << ", " << name << ") failed");
314     else
315         registryChanged = true;
316     return okay;
317 }
318 
319 
320 ///// Misc stuff /////
321 
322 // global strings for various directories - will include trailing path separator character
323 static string
324     workingDir,     // current working directory
325     programDir,     // directory where Cn3D executable lives
326     dataDir,        // 'data' directory with external data files
327     prefsDir;       // application preferences directory
GetWorkingDir(void)328 const string& GetWorkingDir(void) { return workingDir; }
GetProgramDir(void)329 const string& GetProgramDir(void) { return programDir; }
GetDataDir(void)330 const string& GetDataDir(void) { return dataDir; }
GetPrefsDir(void)331 const string& GetPrefsDir(void) { return prefsDir; }
332 
SetUpWorkingDirectories(const char * argv0)333 void SetUpWorkingDirectories(const char* argv0)
334 {
335     // set up working directories
336     workingDir = wxGetCwd().c_str();
337 #ifdef __WXGTK__
338     if (getenv("CN3D_HOME") != NULL)
339         programDir = getenv("CN3D_HOME");
340     else
341 #endif
342     if (wxIsAbsolutePath(argv0))
343         programDir = wxPathOnly(argv0).c_str();
344     else if (wxPathOnly(argv0) == "")
345         programDir = workingDir;
346     else
347         programDir = workingDir + wxFILE_SEP_PATH + WX_TO_STD(wxPathOnly(argv0));
348     workingDir = workingDir + wxFILE_SEP_PATH;
349     programDir = programDir + wxFILE_SEP_PATH;
350 
351     // find or create preferences folder
352     wxString localDir;
353     wxFileName::SplitPath(wxFileConfig::GetLocalFileName("unused"), &localDir, NULL, NULL);
354     wxString prefsDirLocal = localDir + wxFILE_SEP_PATH + "Cn3D_User";
355     wxString prefsDirProg = wxString(programDir.c_str()) + wxFILE_SEP_PATH + "Cn3D_User";
356     if (wxDirExists(prefsDirLocal))
357         prefsDir = prefsDirLocal.c_str();
358     else if (wxDirExists(prefsDirProg))
359         prefsDir = prefsDirProg.c_str();
360     else {
361         // try to create the folder
362         if (wxMkdir(prefsDirLocal) && wxDirExists(prefsDirLocal))
363             prefsDir = prefsDirLocal.c_str();
364         else if (wxMkdir(prefsDirProg) && wxDirExists(prefsDirProg))
365             prefsDir = prefsDirProg.c_str();
366     }
367     if (prefsDir.size() == 0)
368         WARNINGMSG("Can't create Cn3D_User folder at either:"
369             << "\n    " << prefsDirLocal
370             << "\nor  " << prefsDirProg);
371     else
372         prefsDir += wxFILE_SEP_PATH;
373 
374     // set data dir, and register the path in C toolkit registry (mainly for BLAST code)
375 #ifdef __WXMAC__
376     dataDir = programDir + "../Resources/data/";
377 #else
378     dataDir = programDir + "data" + wxFILE_SEP_PATH;
379 #endif
380 
381     TRACEMSG("working dir: " << workingDir.c_str());
382     TRACEMSG("program dir: " << programDir.c_str());
383     TRACEMSG("data dir: " << dataDir.c_str());
384     TRACEMSG("prefs dir: " << prefsDir.c_str());
385 }
386 
387 #ifdef __WXMSW__
388 // code borrowed (and modified) from Nlm_MSWin_OpenDocument() in vibutils.c
MSWin_OpenDocument(const char * doc_name)389 static bool MSWin_OpenDocument(const char* doc_name)
390 {
391     int status = (int) ShellExecute(0, "open", doc_name, NULL, NULL, SW_SHOWNORMAL);
392     if (status <= 32) {
393         ERRORMSG("Unable to open document \"" << doc_name << "\", error = " << status);
394         return false;
395     }
396     return true;
397 }
398 #endif
399 
400 #ifdef __WXMAC__
401 //  CJL Hack ... pass the length of the string
MacLaunchURL(ConstStr255Param urlStr,long int len)402 static OSStatus MacLaunchURL(ConstStr255Param urlStr, long int len)
403 {
404     OSStatus err;
405     ICInstance inst;
406     long int startSel;
407     long int endSel;
408 
409     err = ICStart(&inst, 'Cn3D');
410     if (err == noErr) {
411 #if !TARGET_CARBON
412         err = ICFindConfigFile(inst, 0, nil);
413 #endif
414         if (err == noErr) {
415             startSel = 0;
416 //            endSel = strlen(urlStr);   //  OSX didn't like this:  invalid conversion from
417 //                                          'const unsigned char*' to 'const char*' compiler error.
418 // ConstStr255Param is an unsigned char*.  Mac developer docs do not seem to indicate the '255'
419 // means there are any length restrictions on such strings, and that implementations have some
420 // backing store for longer strings.   But to be safe, I'm truncating this to 255.
421 // As used in Cn3D none of the URLs are terribly long ... except when multiple annotations are selected.
422 // (Also see CoreFoundation header CFBase.h; used in ncbi_os_mac.hpp Pstrncpy)
423             endSel = (len > 0 && len <= 255) ? len : 255;
424             err = ICLaunchURL(inst, "\p", urlStr, endSel, &startSel, &endSel);
425         }
426         ICStop(inst);
427     }
428     return err;
429 }
430 #endif
431 
LaunchWebPage(const char * url)432 void LaunchWebPage(const char *url)
433 {
434     if(!url) return;
435     INFOMSG("launching url " << url);
436 
437 #if defined(__WXMSW__)
438     if (!MSWin_OpenDocument(url)) {
439         ERRORMSG("Unable to launch browser");
440     }
441 
442 #elif defined(__WXGTK__)
443     string command;
444     RegistryGetString(REG_ADVANCED_SECTION, REG_BROWSER_LAUNCH, &command);
445     size_t pos = 0;
446     while ((pos=command.find("<URL>", pos)) != string::npos)
447         command.replace(pos, 5, url);
448     TRACEMSG("launching browser with: " << command);
449     system(command.c_str());
450 
451 #elif defined(__WXMAC__)
452     //  CJL:  hack of dubious generality to get the string length
453     //        of a 'ConstStr255Param' type.
454     //        Unclear if strings longer than 255 characters are safe.  See notes above in MacLaunchURL.
455     unsigned int i = 0, l = strlen(url);
456     unsigned char uc_url[l+1];
457     for (; i < l && i < 255; ++i)  uc_url[i] = (unsigned char) *(url + i);
458     uc_url[i] = '\0';
459     MacLaunchURL(uc_url, l);
460 #endif
461 }
462 
FetchSequenceViaHTTP(const string & id)463 CRef < CBioseq > FetchSequenceViaHTTP(const string& id)
464 {
465     CSeq_entry seqEntry;
466     string err;
467     static const string host("eutils.ncbi.nlm.nih.gov"), path("/entrez/eutils/efetch.fcgi");
468     string args = string("rettype=asn.1&retmode=binary&maxplex=1&id=") + id;
469 
470     // efetch doesn't seem to care whether db is protein or nucleotide, when using gi or accession... but that may change in the future
471     CRef < CBioseq > bioseq;
472     for (unsigned int round=1; round<=2 && bioseq.Empty(); ++round) {
473         string db = (round == 1) ? "protein" : "nucleotide";
474         INFOMSG("Trying to load sequence from URL " << host << path << '?' << (args + "&db=" + db));
475 
476         bool ok = GetAsnDataViaHTTPS(host, path, (args + "&db=" + db), &seqEntry, &err);
477         if (ok) {
478             if (seqEntry.IsSeq())
479                 bioseq.Reset(&(seqEntry.SetSeq()));
480             else if (seqEntry.IsSet() && seqEntry.GetSet().GetSeq_set().front()->IsSeq())
481                 bioseq.Reset(&(seqEntry.SetSet().SetSeq_set().front()->SetSeq()));
482             else
483                 WARNINGMSG("FetchSequenceViaHTTP() - confused by SeqEntry format");
484         } else {
485             WARNINGMSG("FetchSequenceViaHTTP() - HTTP Bioseq retrieval failed, err: " << err);
486         }
487     }
488     return bioseq;
489 }
490 
491 static const string NCBIStdaaResidues("-ABCDEFGHIKLMNPQRSTVWXYZU*OJ");
492 
493 // gives NCBIStdaa residue number for a character (or value for 'X' if char not found)
LookupNCBIStdaaNumberFromCharacter(char r)494 unsigned char LookupNCBIStdaaNumberFromCharacter(char r)
495 {
496     typedef map < char, unsigned char > Char2UChar;
497     static Char2UChar charMap;
498 
499     if (charMap.size() == 0) {
500         for (unsigned int i=0; i<NCBIStdaaResidues.size(); ++i)
501             charMap[NCBIStdaaResidues[i]] = (unsigned char) i;
502     }
503 
504     Char2UChar::const_iterator n = charMap.find(toupper((unsigned char) r));
505     if (n != charMap.end())
506         return n->second;
507     else
508         return charMap.find('X')->second;
509 }
510 
LookupCharacterFromNCBIStdaaNumber(unsigned char n)511 char LookupCharacterFromNCBIStdaaNumber(unsigned char n)
512 {
513     if (n <= 27)
514         return NCBIStdaaResidues[n];
515     ERRORMSG("LookupCharacterFromNCBIStdaaNumber() - valid values are 0 - 27");
516     return '?';
517 }
518 
Prosite2Regex(const string & prosite,string * regex,int * nGroups)519 bool Prosite2Regex(const string& prosite, string *regex, int *nGroups)
520 {
521     try {
522         // check allowed characters ('#' isn't ProSite, but is a special case used to match an 'X' residue character)
523         static const string allowed = "-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789[],(){}<>.#";
524         unsigned int i;
525         for (i=0; i<prosite.size(); ++i)
526             if (allowed.find(toupper((unsigned char) prosite[i])) == string::npos) break;
527         if (i != prosite.size()) throw "invalid ProSite character";
528         if (prosite[prosite.size() - 1] != '.') throw "ProSite pattern must end with '.'";
529 
530         // translate into real regex syntax;
531         regex->erase();
532         *nGroups = 0;
533 
534         bool inGroup = false;
535         for (unsigned int i=0; i<prosite.size(); ++i) {
536 
537             // handle grouping and termini
538             bool characterHandled = true;
539             switch (prosite[i]) {
540                 case '-': case '.': case '>':
541                     if (inGroup) {
542                         *regex += ')';
543                         inGroup = false;
544                     }
545                     if (prosite[i] == '>') *regex += '$';
546                     break;
547                 case '<':
548                     *regex += '^';
549                     break;
550                 default:
551                     characterHandled = false;
552                     break;
553             }
554             if (characterHandled) continue;
555             if (!inGroup && (
556                     (isalpha((unsigned char) prosite[i]) && toupper((unsigned char) prosite[i]) != 'X') ||
557                     prosite[i] == '[' || prosite[i] == '{' || prosite[i] == '#')) {
558                 *regex += '(';
559                 ++(*nGroups);
560                 inGroup = true;
561             }
562 
563             // translate syntax
564             switch (prosite[i]) {
565                 case '(':
566                     *regex += '{';
567                     break;
568                 case ')':
569                     *regex += '}';
570                     break;
571                 case '{':
572                     *regex += "[^";
573                     break;
574                 case '}':
575                     *regex += ']';
576                     break;
577                 case 'X': case 'x':
578                     *regex += '.';
579                     break;
580                 case '#':
581                     *regex += 'X';
582                     break;
583                 default:
584                     *regex += toupper((unsigned char) prosite[i]);
585                     break;
586             }
587         }
588     }
589 
590     catch (const char *err) {
591         ERRORMSG("Prosite2Regex() - " << err);
592         return false;
593     }
594 
595     return true;
596 }
597 
PrositePatternLength(const string & prosite)598 unsigned int PrositePatternLength(const string& prosite)
599 {
600     //  ('#' isn't ProSite, but is a special case used to match an 'X' residue character)
601     static const string allowed = "-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789[],(){}<>.#";
602 
603     // check allowed characters
604     unsigned int i;
605     for (i=0; i<prosite.size(); ++i)
606         if (allowed.find(toupper((unsigned char) prosite[i])) == string::npos) break;
607     if (i != prosite.size()) return 0;
608 
609     bool hasOnlyX = true, stopParsing = false;
610     bool inBraces = false, inBrackets = false, inParens = false;
611     unsigned int length = 0;
612     int nFromParens;
613     string betweenParens;
614 
615     for (i=0; i < prosite.size() && !stopParsing; ++i) {
616 
617         // handle grouping and termini
618         bool characterHandled = true;
619         switch (prosite[i]) {
620             case '-': case '.': case '>': case '<':
621                 break;
622             default:
623                 characterHandled = false;
624                 break;
625         }
626         if (inParens && prosite[i] != ')' && !characterHandled) betweenParens += prosite[i];
627         if (characterHandled) continue;
628 
629         if (hasOnlyX && isalpha((unsigned char) prosite[i]) && toupper((unsigned char) prosite[i]) != 'X') {
630             hasOnlyX = false;
631         }
632 
633         // translate syntax
634         switch (prosite[i]) {
635             case '(':
636                 inParens = true;
637                 break;
638             case ')':
639                 nFromParens = NStr::StringToInt(betweenParens, NStr::fConvErr_NoThrow);
640 
641                 //  Do not allow a variable number of repetitions.
642                 //  Also, length has already been incremented by 1 for whatever the (...) references
643                 if (nFromParens > 0)
644                     length += nFromParens - 1;
645                 else
646                     stopParsing = true;
647 
648                 inParens = false;
649                 betweenParens.erase();
650                 break;
651             case '{':
652                 inBraces = true;
653                 break;
654             case '}':
655                 ++length;
656                 inBraces = false;
657                 break;
658             case '[':
659                 inBrackets = true;
660                 break;
661             case ']':
662                 ++length;
663                 inBrackets = false;
664                 break;
665             default:
666                 if (!inParens && !inBraces && !inBrackets) ++length;
667                 break;
668         }
669     }
670 
671     //  Invalid pattern:  Appear to have missed a closing parenthesis/brace/bracket.
672     if (inParens || inBrackets || inBraces) length = 0;
673 
674     //  Invalid pattern:  Appear to have all 'X' characters.
675     if (hasOnlyX) length = 0;
676 
677     //  If there was some parsing error or prosite pattern allowed
678     //  a match of indeterminate length, return 0.
679     if (stopParsing) length = 0;
680 
681     return length;
682 }
683 
684 END_SCOPE(Cn3D)
685