1 /* utilfun.h
2  *
3  * ===========================================================================
4  *
5  *                            PUBLIC DOMAIN NOTICE
6  *               National Center for Biotechnology Information
7  *
8  *  This software/database is a "United States Government Work" under the
9  *  terms of the United States Copyright Act.  It was written as part of
10  *  the author's official duties as a United States Government employee and
11  *  thus cannot be copyrighted.  This software/database is freely available
12  *  to the public for use. The National Library of Medicine and the U.S.
13  *  Government have not placed any restriction on its use or reproduction.
14  *
15  *  Although all reasonable efforts have been taken to ensure the accuracy
16  *  and reliability of the software and data, the NLM and the U.S.
17  *  Government do not and cannot warrant the performance or results that
18  *  may be obtained by using this software or data. The NLM and the U.S.
19  *  Government disclaim all warranties, express or implied, including
20  *  warranties of performance, merchantability or fitness for any particular
21  *  purpose.
22  *
23  *  Please cite the author in any work or product based on this material.
24  *
25  * ===========================================================================
26  *
27  * File Name:  utilfun.h
28  *
29  * Author: Karl Sirotkin, Hsiu-Chuan Chen
30  *
31  * File Description:
32  * -----------------
33  *      Utility functions for parser.
34  *
35  */
36 
37 #ifndef _UTILFUN_
38 #define _UTILFUN_
39 
40 /* for unknown Moltype
41  */
42 #define Unknown      0
43 
44 /* for Unknown Keyword type
45  */
46 #define ParFlat_UNKW 999
47 
48 #include <objects/general/Date_std.hpp>
49 #include <objects/seqblock/GB_block.hpp>
50 #include <objmgr/scope.hpp>
51 #include <objmgr/object_manager.hpp>
52 
53 #include "ftablock.h"
54 
55 BEGIN_NCBI_SCOPE
56 
57 CRef<objects::CDate_std> get_full_date(const Char* s, bool is_ref, Parser::ESource source);
58 
59 /**********************************************************/
60 
61 /* relative routines for tokenize string
62     */
63 TokenStatBlk* TokenString(char* str, Char delimiter);
64 TokenStatBlk* TokenStringByDelimiter(char* str, Char delimiter);
65 void            FreeTokenstatblk(TokenStatBlk* tsbp);
66 void            FreeTokenblk(TokenBlk* tbp);
67 bool            ParseAccessionRange(TokenStatBlk* tsbp, Int4 skip);
68 bool            ParseAccessionRange(list<string>& tokens, int skip);
69 void            UnwrapAccessionRange(const objects::CGB_block::TExtra_accessions& extra_accs, objects::CGB_block::TExtra_accessions& hist);
70 
71 /* Return array position of the matched length of string in array_string.
72     * Return -1 if no match.
73     */
74 Int2            fta_StringMatch(const Char **array, const Char* text);
75 
76 /* Return array position of the matched length of string (ignored case)
77     * in array_string.
78     * Return -1 if no match.
79     */
80 Int2            StringMatchIcase(const Char **array, const Char* text);
81 
82 char*         StringIStr(const Char* where, const Char *what);
83 
84 /* Return array position of the string in the array_string.
85     * Return -1 if no match.
86     */
87 Int2            MatchArrayString(const Char **array, const Char *text);
88 Int2            MatchArrayIString(const Char **array, const Char *text);
89 
90 /* Return array position of the string in the array_string if any
91     * array_string is in the substring of "text".
92     * Return -1 if no match.
93     */
94 Int2            MatchArraySubString(const Char **array, const Char* text);
95 Int2            MatchArrayISubString(const Char **array, const Char* text);
96 
97 /* Return a string which replace newline to blank and skip "XX" line data.
98     */
99 char*         GetBlkDataReplaceNewLine(char* bptr, char* eptr, Int2 col_data);
100 
101 /* Delete any tailing ' ', '\n', '\\', ',', ';', '~', '.', ':'
102     * characters.
103     */
104 void            CleanTailNoneAlphaChar(char* str);
105 void            CleanTailNoneAlphaCharInString(std::string& str);
106 
107 char*         PointToNextToken(char* ptr);
108 
109 /* Return the current token which ptr points to and ptr will points to
110     * next token after the routine return.
111     */
112 char*         GetTheCurrentToken(char** ptr);
113 
114 /* Search The character letter.
115     * Return NULL if not found; otherwise, return a pointer points first
116     * occurrence The character.
117     */
118 char*         SrchTheChar(char* bptr, char* eptr, Char letter);
119 
120 /* Search The string.
121     * Return NULL if not found; otherwise, return a pointer points first
122     * occurrence The string.
123     */
124 char*         SrchTheStr(char* bptr, char* eptr, const char *str);
125 
126 void            CpSeqId(InfoBioseq* ibp, const objects::CSeq_id& id);
127 
128 void            InfoBioseqFree(InfoBioseq* ibp);
129 Int2            SrchKeyword(char* ptr, KwordBlk kwl[]);
130 bool            CheckLineType(char* ptr, Int4 line, KwordBlk kwl[], bool after_origin);
131 char*         SrchNodeType(DataBlk* entry, Int4 type, size_t* len);
132 DataBlk*      TrackNodeType(DataBlk* entry, Int2 type);
133 void            fta_operon_free(FTAOperon* fop);
134 ValNode*      ConstructValNode(ValNode* head, Uint1 choice, void* data);
135 ValNode*      ConstructValNodeInt(ValNode* head, Uint1 choice, Int4 data);
136 bool            fta_is_tpa_keyword(const char* str);
137 bool            fta_tpa_keywords_check(const TKeywordList& kwds);
138 bool            fta_is_tsa_keyword(char* str);
139 bool            fta_is_tls_keyword(char* str);
140 bool            fta_tsa_keywords_check(const TKeywordList& kwds, Parser::ESource source);
141 bool            fta_tls_keywords_check(const TKeywordList& kwds, Parser::ESource source);
142 bool            fta_check_mga_keywords(objects::CMolInfo& mol_info, const TKeywordList& kwds);
143 void            fta_StringCpy(char* dst, char* src);
144 
145 void            fta_keywords_check(const char* str, bool* estk, bool* stsk, bool* gssk,
146     bool* htck, bool* flik, bool* wgsk, bool* tpak,
147     bool* envk, bool* mgak, bool* tsak, bool* tlsk);
148 
149 void            fta_remove_keywords(Uint1 tech, TKeywordList& kwds);
150 void            fta_remove_tpa_keywords(TKeywordList& kwds);
151 void            fta_remove_tsa_keywords(TKeywordList& kwds, Parser::ESource source);
152 void            fta_remove_tls_keywords(TKeywordList& kwds, Parser::ESource source);
153 void            fta_remove_env_keywords(TKeywordList& kwds);
154 
155 bool            IsCancelled(const TKeywordList& keywords);
156 bool            HasHtg(const TKeywordList& keywords);
157 void            RemoveHtgPhase(TKeywordList& keywords);
158 bool            HasHtc(const TKeywordList& keywords);
159 bool            SetTextId(Uint1 seqtype, objects::CSeq_id& seqId, objects::CTextseq_id& textId);
160 
161 void            check_est_sts_gss_tpa_kwds(ValNodePtr kwds, size_t len,
162                                                   IndexblkPtr entry,
163                                                   bool tpa_check,
164                                                   bool &specialist_db,
165                                                   bool &inferential,
166                                                   bool &experimental,
167                                                   bool &assembly);
168 
169 namespace objects {
170     class CScope;
171 }
172 
173 objects::CScope& GetScope();
174 
175 
176 END_NCBI_SCOPE
177 
178 #endif
179