1 /*  $Id: seq_masker_istat_factory.hpp 575325 2018-11-27 18:22:00Z ucko $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Aleksandr Morgulis
27  *
28  * File Description:
29  *   Definition for CSeqMaskerIstatFactory class.
30  *
31  */
32 
33 #ifndef C_SEQ_MASKER_ISTAT_FACTORY_H
34 #define C_SEQ_MASKER_ISTAT_FACTORY_H
35 
36 #include <string>
37 
38 #include <corelib/ncbitype.h>
39 #include <corelib/ncbistr.hpp>
40 #include <corelib/ncbiobj.hpp>
41 
42 BEGIN_NCBI_SCOPE
43 
44 class CSeqMaskerIstat;
45 
46 /**
47  **\brief Factory class to generate an appropriate CSeqMaskerIstat
48  **       derived class based on the format name.
49  **/
50 class NCBI_XALGOWINMASK_EXPORT CSeqMaskerIstatFactory
51 {
52 public:
53 
54     /// Counts statistics formats.
55     ///
56     enum EStatType {
57         eAscii,
58         eBinary,
59         eOAscii,
60         eOBinary,
61         eUnknown
62     };
63 
64     /**
65         **\brief Exceptions that CSeqMaskerIstatFactory might throw.
66         **/
67     class Exception : public CException
68     {
69         public:
70 
71             enum EErrCode
72             {
73                 eBadFormat,     /**< Unknown file format. */
74                 eCreateFail,    /**< Could not create the CSeqMaskerIstat object. */
75                 eOpen           /**< Could not open file. */
76             };
77 
78             /**
79                 **\brief Get a description string for this exception.
80                 **\return C-style description string
81                 **/
82             virtual const char * GetErrCodeString() const override;
83 
84             NCBI_EXCEPTION_DEFAULT( Exception, CException );
85 
86     };
87 
88     /// Return the format of the counts statistics file.
89     ///
90     static EStatType DiscoverStatType( string const & name );
91 
92     /**
93         **\brief Create a unit counts container from a file.
94         **
95         ** All parameters after name are forwarded to the constructor of the
96         ** proper subclass of CSeqMaskerIstat.
97         **
98         **\param name name of the file containing the unit counts information
99         **\param threshold T_threshold
100         **\param textend T_extend
101         **\param max_count T_high
102         **\param use_max_count value to use for units with count > T_high
103         **\param min_count T_low
104         **\param use_min_count value to use for units with count < T_low
105         **\param use_ba use bit array optimization if available
106         **/
107     static CSeqMaskerIstat * create( const string & name,
108                                         Uint4 threshold,
109                                         Uint4 textend,
110                                         Uint4 max_count,
111                                         Uint4 use_max_count,
112                                         Uint4 min_count,
113                                         Uint4 use_min_count,
114                                         bool use_ba );
115 
116 private:
117 
118     /// Return the format of the counts statistics file. At the same time
119     /// return the number of metadata lines (for text formats) and size
120     /// of metadata in bytes (for binary formats).
121     ///
122     static EStatType DiscoverStatType(
123             string const & name, vector< string > & md, size_t & skip );
124 };
125 
126 END_NCBI_SCOPE
127 
128 #endif
129