1 /*  $Id: blast_asn1_input.hpp 539637 2017-06-26 13:17:55Z boratyng $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Greg Boratyn
27  *
28  */
29 
30 /** @file blast_fasta_input.hpp
31  * Interface for ASN1 files into blast sequence input
32  */
33 
34 #ifndef ALGO_BLAST_BLASTINPUT___BLAST_ASN1_INPUT__HPP
35 #define ALGO_BLAST_BLASTINPUT___BLAST_ASN1_INPUT__HPP
36 
37 #include <algo/blast/blastinput/blast_input.hpp>
38 #include <algo/blast/blastinput/blast_scope_src.hpp>
39 
40 BEGIN_NCBI_SCOPE
41 BEGIN_SCOPE(blast)
42 
43 
44 /// Class representing a text or binary file containing sequences in ASN.1
45 /// format as a collection of Seq-entry objects
46 class NCBI_BLASTINPUT_EXPORT CASN1InputSourceOMF : public CBlastInputSourceOMF
47 {
48 public:
49 
50     /// Constructor
51     /// @param infile Input stream for query sequences [in]
52     /// @param is_bin Is input in binary ASN.1 format [in]
53     /// @param is_paired Are queries paired [in]
54     /// @param validate Should sequence validation be applied to each read
55     CASN1InputSourceOMF(CNcbiIstream& infile, bool is_bin = false,
56                         bool is_paired = false);
57 
58     /// Constructor for reading sequences from two files for paired short reads
59     /// @param infile1 Input stream for query sequences [in]
60     /// @param infile2 Input stream for query mates [in]
61     /// @param is_bin Is input in binary ASN.1 format [in]
62     /// @param validate Should sequence validation be applied to each read
63     CASN1InputSourceOMF(CNcbiIstream& infile1, CNcbiIstream& infile2,
64                         bool is_bin = false);
65 
~CASN1InputSourceOMF()66     virtual ~CASN1InputSourceOMF() {}
67 
68     virtual int GetNextSequence(CBioseq_set& bioseq_set);
69 
End(void)70     virtual bool End(void) {return m_InputStream->eof();}
71 
72 
73 private:
74     CASN1InputSourceOMF(const CASN1InputSourceOMF&);
75     CASN1InputSourceOMF& operator=(const CASN1InputSourceOMF&);
76 
77     /// Read one sequence from
78     CRef<CSeq_entry> x_ReadOneSeq(CNcbiIstream& instream);
79 
80     /// Read sequences from one stream
81     bool x_ReadFromSingleFile(CBioseq_set& bioseq_set);
82 
83     /// Read sequences from two streams
84     bool x_ReadFromTwoFiles(CBioseq_set& bioseq_set);
85 
86     /// Number of bases added so far
87     TSeqPos m_BasesAdded;
88     CNcbiIstream* m_InputStream;
89     // for reading paired reads from two FASTA files
90     CNcbiIstream* m_SecondInputStream;
91     /// Are queries paired
92     bool m_IsPaired;
93     /// Is input binary ASN1
94     bool m_IsBinary;
95 };
96 
97 
98 END_SCOPE(blast)
99 END_NCBI_SCOPE
100 
101 #endif  /* ALGO_BLAST_BLASTINPUT___BLAST_ASN1_INPUT__HPP */
102