1 /* $Id: seq_masker_istat_factory.hpp 575325 2018-11-27 18:22:00Z ucko $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Author: Aleksandr Morgulis 27 * 28 * File Description: 29 * Definition for CSeqMaskerIstatFactory class. 30 * 31 */ 32 33 #ifndef C_SEQ_MASKER_ISTAT_FACTORY_H 34 #define C_SEQ_MASKER_ISTAT_FACTORY_H 35 36 #include <string> 37 38 #include <corelib/ncbitype.h> 39 #include <corelib/ncbistr.hpp> 40 #include <corelib/ncbiobj.hpp> 41 42 BEGIN_NCBI_SCOPE 43 44 class CSeqMaskerIstat; 45 46 /** 47 **\brief Factory class to generate an appropriate CSeqMaskerIstat 48 ** derived class based on the format name. 49 **/ 50 class NCBI_XALGOWINMASK_EXPORT CSeqMaskerIstatFactory 51 { 52 public: 53 54 /// Counts statistics formats. 55 /// 56 enum EStatType { 57 eAscii, 58 eBinary, 59 eOAscii, 60 eOBinary, 61 eUnknown 62 }; 63 64 /** 65 **\brief Exceptions that CSeqMaskerIstatFactory might throw. 66 **/ 67 class Exception : public CException 68 { 69 public: 70 71 enum EErrCode 72 { 73 eBadFormat, /**< Unknown file format. */ 74 eCreateFail, /**< Could not create the CSeqMaskerIstat object. */ 75 eOpen /**< Could not open file. */ 76 }; 77 78 /** 79 **\brief Get a description string for this exception. 80 **\return C-style description string 81 **/ 82 virtual const char * GetErrCodeString() const override; 83 84 NCBI_EXCEPTION_DEFAULT( Exception, CException ); 85 86 }; 87 88 /// Return the format of the counts statistics file. 89 /// 90 static EStatType DiscoverStatType( string const & name ); 91 92 /** 93 **\brief Create a unit counts container from a file. 94 ** 95 ** All parameters after name are forwarded to the constructor of the 96 ** proper subclass of CSeqMaskerIstat. 97 ** 98 **\param name name of the file containing the unit counts information 99 **\param threshold T_threshold 100 **\param textend T_extend 101 **\param max_count T_high 102 **\param use_max_count value to use for units with count > T_high 103 **\param min_count T_low 104 **\param use_min_count value to use for units with count < T_low 105 **\param use_ba use bit array optimization if available 106 **/ 107 static CSeqMaskerIstat * create( const string & name, 108 Uint4 threshold, 109 Uint4 textend, 110 Uint4 max_count, 111 Uint4 use_max_count, 112 Uint4 min_count, 113 Uint4 use_min_count, 114 bool use_ba ); 115 116 private: 117 118 /// Return the format of the counts statistics file. At the same time 119 /// return the number of metadata lines (for text formats) and size 120 /// of metadata in bytes (for binary formats). 121 /// 122 static EStatType DiscoverStatType( 123 string const & name, vector< string > & md, size_t & skip ); 124 }; 125 126 END_NCBI_SCOPE 127 128 #endif 129