1 /* $Id: blast_sra_input.hpp 543876 2017-08-15 13:17:03Z boratyng $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Author: Greg Boratyn 27 * 28 */ 29 30 /** @file blast_fasta_input.hpp 31 * Interface for reading SRA sequences into blast input 32 */ 33 34 #ifndef ALGO_BLAST_BLASTINPUT___BLAST_SRA_INPUT__HPP 35 #define ALGO_BLAST_BLASTINPUT___BLAST_SRA_INPUT__HPP 36 37 #include <algo/blast/blastinput/blast_input.hpp> 38 #include <sra/readers/sra/csraread.hpp> 39 40 BEGIN_NCBI_SCOPE 41 BEGIN_SCOPE(blast) 42 43 44 /// Class for reading sequences from SRA respository or SRA file 45 class CSraInputSource : public CBlastInputSourceOMF, public CBlastInputSource 46 { 47 public: 48 49 /// Constructor 50 /// @param accessions SRA accessions or files [in] 51 /// @param check_for_pairs If true, determine if reads are paired based on 52 /// information in SRA [in] 53 /// @param cache_enabled Enable caching SRA data in local files (see 54 /// File Caching at 55 /// https://github.com/ncbi/sra-tools/wiki/Toolkit-Configuration) [in] 56 CSraInputSource(const vector<string>& accessions, 57 bool check_for_paires = true, 58 bool cache_enabled = false); 59 ~CSraInputSource()60 virtual ~CSraInputSource() {} 61 62 virtual int GetNextSequence(CBioseq_set& bioseq_set); 63 64 virtual bool End(void); 65 66 virtual SSeqLoc GetNextSSeqLoc(CScope& scope); 67 68 virtual CRef<CBlastSearchQuery> GetNextSequence(CScope& scope); 69 70 71 private: 72 CSraInputSource(const CSraInputSource&); 73 CSraInputSource& operator=(const CSraInputSource&); 74 75 /// Read one sequence pointed by the iterator 76 CRef<CSeq_entry> x_ReadOneSeq(void); 77 78 /// Read one sequence pointed by the iterator and add it to the bioseq_set 79 /// object 80 CSeq_entry* x_ReadOneSeq(CBioseq_set& bioseq_set); 81 82 /// Read one batch of sequences and mark pairs 83 void x_ReadPairs(CBioseq_set& bioseq_set); 84 85 /// Advance to the next SRA accession 86 void x_NextAccession(void); 87 88 /// Read the next sequence, add it to scope and return Seq-loc object 89 CRef<CSeq_loc> x_GetNextSeq_loc(CScope& scope); 90 91 auto_ptr<CCSraDb> m_SraDb; 92 auto_ptr<CCSraShortReadIterator> m_It; 93 94 vector<string> m_Accessions; 95 vector<string>::iterator m_ItAcc; 96 97 /// Number of bases added so far 98 TSeqPos m_BasesAdded; 99 100 /// Are queries paired 101 bool m_IsPaired; 102 }; 103 104 105 END_SCOPE(blast) 106 END_NCBI_SCOPE 107 108 #endif /* ALGO_BLAST_BLASTINPUT___BLAST_SRA_INPUT__HPP */ 109