1 /*  $Id: blast_sra_input.hpp 543876 2017-08-15 13:17:03Z boratyng $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Greg Boratyn
27  *
28  */
29 
30 /** @file blast_fasta_input.hpp
31  * Interface for reading SRA sequences into blast input
32  */
33 
34 #ifndef ALGO_BLAST_BLASTINPUT___BLAST_SRA_INPUT__HPP
35 #define ALGO_BLAST_BLASTINPUT___BLAST_SRA_INPUT__HPP
36 
37 #include <algo/blast/blastinput/blast_input.hpp>
38 #include <sra/readers/sra/csraread.hpp>
39 
40 BEGIN_NCBI_SCOPE
41 BEGIN_SCOPE(blast)
42 
43 
44 /// Class for reading sequences from SRA respository or SRA file
45 class CSraInputSource : public CBlastInputSourceOMF, public CBlastInputSource
46 {
47 public:
48 
49     /// Constructor
50     /// @param accessions SRA accessions or files [in]
51     /// @param check_for_pairs If true, determine if reads are paired based on
52     /// information in SRA [in]
53     /// @param cache_enabled Enable caching SRA data in local files (see
54     /// File Caching at
55     /// https://github.com/ncbi/sra-tools/wiki/Toolkit-Configuration) [in]
56     CSraInputSource(const vector<string>& accessions,
57                     bool check_for_paires = true,
58                     bool cache_enabled = false);
59 
~CSraInputSource()60     virtual ~CSraInputSource() {}
61 
62     virtual int GetNextSequence(CBioseq_set& bioseq_set);
63 
64     virtual bool End(void);
65 
66     virtual SSeqLoc GetNextSSeqLoc(CScope& scope);
67 
68     virtual CRef<CBlastSearchQuery> GetNextSequence(CScope& scope);
69 
70 
71 private:
72     CSraInputSource(const CSraInputSource&);
73     CSraInputSource& operator=(const CSraInputSource&);
74 
75     /// Read one sequence pointed by the iterator
76     CRef<CSeq_entry> x_ReadOneSeq(void);
77 
78     /// Read one sequence pointed by the iterator and add it to the bioseq_set
79     /// object
80     CSeq_entry* x_ReadOneSeq(CBioseq_set& bioseq_set);
81 
82     /// Read one batch of sequences and mark pairs
83     void x_ReadPairs(CBioseq_set& bioseq_set);
84 
85     /// Advance to the next SRA accession
86     void x_NextAccession(void);
87 
88     /// Read the next sequence, add it to scope and return Seq-loc object
89     CRef<CSeq_loc> x_GetNextSeq_loc(CScope& scope);
90 
91     auto_ptr<CCSraDb> m_SraDb;
92     auto_ptr<CCSraShortReadIterator> m_It;
93 
94     vector<string> m_Accessions;
95     vector<string>::iterator m_ItAcc;
96 
97     /// Number of bases added so far
98     TSeqPos m_BasesAdded;
99 
100     /// Are queries paired
101     bool m_IsPaired;
102 };
103 
104 
105 END_SCOPE(blast)
106 END_NCBI_SCOPE
107 
108 #endif  /* ALGO_BLAST_BLASTINPUT___BLAST_SRA_INPUT__HPP */
109