1 /* utilfun.h 2 * 3 * =========================================================================== 4 * 5 * PUBLIC DOMAIN NOTICE 6 * National Center for Biotechnology Information 7 * 8 * This software/database is a "United States Government Work" under the 9 * terms of the United States Copyright Act. It was written as part of 10 * the author's official duties as a United States Government employee and 11 * thus cannot be copyrighted. This software/database is freely available 12 * to the public for use. The National Library of Medicine and the U.S. 13 * Government have not placed any restriction on its use or reproduction. 14 * 15 * Although all reasonable efforts have been taken to ensure the accuracy 16 * and reliability of the software and data, the NLM and the U.S. 17 * Government do not and cannot warrant the performance or results that 18 * may be obtained by using this software or data. The NLM and the U.S. 19 * Government disclaim all warranties, express or implied, including 20 * warranties of performance, merchantability or fitness for any particular 21 * purpose. 22 * 23 * Please cite the author in any work or product based on this material. 24 * 25 * =========================================================================== 26 * 27 * File Name: utilfun.h 28 * 29 * Author: Karl Sirotkin, Hsiu-Chuan Chen 30 * 31 * File Description: 32 * ----------------- 33 * Utility functions for parser. 34 * 35 */ 36 37 #ifndef _UTILFUN_ 38 #define _UTILFUN_ 39 40 /* for unknown Moltype 41 */ 42 #define Unknown 0 43 44 /* for Unknown Keyword type 45 */ 46 #define ParFlat_UNKW 999 47 48 #include <objects/general/Date_std.hpp> 49 #include <objects/seqblock/GB_block.hpp> 50 #include <objmgr/scope.hpp> 51 #include <objmgr/object_manager.hpp> 52 53 #include "ftablock.h" 54 55 BEGIN_NCBI_SCOPE 56 57 CRef<objects::CDate_std> get_full_date(const Char* s, bool is_ref, Parser::ESource source); 58 59 /**********************************************************/ 60 61 /* relative routines for tokenize string 62 */ 63 TokenStatBlk* TokenString(char* str, Char delimiter); 64 TokenStatBlk* TokenStringByDelimiter(char* str, Char delimiter); 65 void FreeTokenstatblk(TokenStatBlk* tsbp); 66 void FreeTokenblk(TokenBlk* tbp); 67 bool ParseAccessionRange(TokenStatBlk* tsbp, Int4 skip); 68 bool ParseAccessionRange(list<string>& tokens, int skip); 69 void UnwrapAccessionRange(const objects::CGB_block::TExtra_accessions& extra_accs, objects::CGB_block::TExtra_accessions& hist); 70 71 /* Return array position of the matched length of string in array_string. 72 * Return -1 if no match. 73 */ 74 Int2 fta_StringMatch(const Char **array, const Char* text); 75 76 /* Return array position of the matched length of string (ignored case) 77 * in array_string. 78 * Return -1 if no match. 79 */ 80 Int2 StringMatchIcase(const Char **array, const Char* text); 81 82 char* StringIStr(const Char* where, const Char *what); 83 84 /* Return array position of the string in the array_string. 85 * Return -1 if no match. 86 */ 87 Int2 MatchArrayString(const Char **array, const Char *text); 88 Int2 MatchArrayIString(const Char **array, const Char *text); 89 90 /* Return array position of the string in the array_string if any 91 * array_string is in the substring of "text". 92 * Return -1 if no match. 93 */ 94 Int2 MatchArraySubString(const Char **array, const Char* text); 95 Int2 MatchArrayISubString(const Char **array, const Char* text); 96 97 /* Return a string which replace newline to blank and skip "XX" line data. 98 */ 99 char* GetBlkDataReplaceNewLine(char* bptr, char* eptr, Int2 col_data); 100 101 /* Delete any tailing ' ', '\n', '\\', ',', ';', '~', '.', ':' 102 * characters. 103 */ 104 void CleanTailNoneAlphaChar(char* str); 105 void CleanTailNoneAlphaCharInString(std::string& str); 106 107 char* PointToNextToken(char* ptr); 108 109 /* Return the current token which ptr points to and ptr will points to 110 * next token after the routine return. 111 */ 112 char* GetTheCurrentToken(char** ptr); 113 114 /* Search The character letter. 115 * Return NULL if not found; otherwise, return a pointer points first 116 * occurrence The character. 117 */ 118 char* SrchTheChar(char* bptr, char* eptr, Char letter); 119 120 /* Search The string. 121 * Return NULL if not found; otherwise, return a pointer points first 122 * occurrence The string. 123 */ 124 char* SrchTheStr(char* bptr, char* eptr, const char *str); 125 126 void CpSeqId(InfoBioseq* ibp, const objects::CSeq_id& id); 127 128 void InfoBioseqFree(InfoBioseq* ibp); 129 Int2 SrchKeyword(char* ptr, KwordBlk kwl[]); 130 bool CheckLineType(char* ptr, Int4 line, KwordBlk kwl[], bool after_origin); 131 char* SrchNodeType(DataBlk* entry, Int4 type, size_t* len); 132 DataBlk* TrackNodeType(DataBlk* entry, Int2 type); 133 void fta_operon_free(FTAOperon* fop); 134 ValNode* ConstructValNode(ValNode* head, Uint1 choice, void* data); 135 ValNode* ConstructValNodeInt(ValNode* head, Uint1 choice, Int4 data); 136 bool fta_is_tpa_keyword(const char* str); 137 bool fta_tpa_keywords_check(const TKeywordList& kwds); 138 bool fta_is_tsa_keyword(char* str); 139 bool fta_is_tls_keyword(char* str); 140 bool fta_tsa_keywords_check(const TKeywordList& kwds, Parser::ESource source); 141 bool fta_tls_keywords_check(const TKeywordList& kwds, Parser::ESource source); 142 bool fta_check_mga_keywords(objects::CMolInfo& mol_info, const TKeywordList& kwds); 143 void fta_StringCpy(char* dst, char* src); 144 145 void fta_keywords_check(const char* str, bool* estk, bool* stsk, bool* gssk, 146 bool* htck, bool* flik, bool* wgsk, bool* tpak, 147 bool* envk, bool* mgak, bool* tsak, bool* tlsk); 148 149 void fta_remove_keywords(Uint1 tech, TKeywordList& kwds); 150 void fta_remove_tpa_keywords(TKeywordList& kwds); 151 void fta_remove_tsa_keywords(TKeywordList& kwds, Parser::ESource source); 152 void fta_remove_tls_keywords(TKeywordList& kwds, Parser::ESource source); 153 void fta_remove_env_keywords(TKeywordList& kwds); 154 155 bool IsCancelled(const TKeywordList& keywords); 156 bool HasHtg(const TKeywordList& keywords); 157 void RemoveHtgPhase(TKeywordList& keywords); 158 bool HasHtc(const TKeywordList& keywords); 159 bool SetTextId(Uint1 seqtype, objects::CSeq_id& seqId, objects::CTextseq_id& textId); 160 161 void check_est_sts_gss_tpa_kwds(ValNodePtr kwds, size_t len, 162 IndexblkPtr entry, 163 bool tpa_check, 164 bool &specialist_db, 165 bool &inferential, 166 bool &experimental, 167 bool &assembly); 168 169 namespace objects { 170 class CScope; 171 } 172 173 objects::CScope& GetScope(); 174 175 176 END_NCBI_SCOPE 177 178 #endif 179