1 #include <iostream>
2 #include <seqan/store.h>
3 #include <seqan/misc/interval_tree.h>
4 #include <seqan/parallel.h>
5 
6 using namespace seqan;
7 
8 
9 // define used types
10 typedef FragmentStore<>                         TStore;
11 typedef Value<TStore::TAnnotationStore>::Type   TAnnotation;
12 typedef TAnnotation::TId                        TId;
13 typedef TAnnotation::TPos                       TPos;
14 typedef IntervalAndCargo<TPos, TId>             TInterval;
15 
16 //
17 // 1. Load annotations and alignments from files
18 //
loadFiles(TStore & store,std::string const & annotationFileName,std::string const & alignmentFileName)19 bool loadFiles(TStore & store, std::string const & annotationFileName,  std::string const & alignmentFileName)
20 {
21     BamFileIn alignmentFile;
22     if (!open(alignmentFile, alignmentFileName.c_str()))
23     {
24         std::cerr << "Couldn't open alignment file " << alignmentFileName << std::endl;
25         return false;
26     }
27     std::cout << "Loading read alignments ..... " << std::flush;
28     readRecords(store, alignmentFile);
29     std::cout << "[" << length(store.alignedReadStore) << "]" << std::endl;
30 
31     // load annotations
32     GffFileIn annotationFile;
33     if (!open(annotationFile, toCString(annotationFileName)))
34     {
35         std::cerr << "Couldn't open annotation file" << annotationFileName << std::endl;
36         return false;
37     }
38     std::cout << "Loading genome annotation ... " << std::flush;
39     readRecords(store, annotationFile);
40     std::cout << "[" << length(store.annotationStore) << "]" << std::endl;
41 
42     return true;
43 }
44 
45 //![solution]
46 //
47 // 2. Extract intervals from gene annotations (grouped by contigId)
48 //
extractGeneIntervals(String<String<TInterval>> & intervals,TStore const & store)49 void extractGeneIntervals(String<String<TInterval> > & intervals, TStore const & store)
50 {
51     // extract intervals from gene annotations (grouped by contigId)
52     resize(intervals, length(store.contigStore));
53 
54     Iterator<TStore const, AnnotationTree<> >::Type it = begin(store, AnnotationTree<>());
55 
56     if (!goDown(it))
57         return;
58 
59     do
60     {
61         SEQAN_ASSERT_EQ(getType(it), "gene");
62         TPos beginPos = getAnnotation(it).beginPos;
63         TPos endPos = getAnnotation(it).endPos;
64         TId contigId = getAnnotation(it).contigId;
65 
66         if (beginPos > endPos)
67             std::swap(beginPos, endPos);
68 
69         // insert forward-strand interval of the gene and its annotation id
70         appendValue(intervals[contigId], TInterval(beginPos, endPos, value(it)));
71     }
72     while (goRight(it));
73 }
74 //![solution]
75 
76 //![main]
main(int argc,char const * argv[])77 int main(int argc, char const * argv[])
78 {
79     TStore store;
80     String<String<TInterval> > intervals;
81 
82     std::string annotationFileName = getAbsolutePath("demos/tutorial/simple_rna_seq/example.gtf");
83     std::string alignmentFileName = getAbsolutePath("demos/tutorial/simple_rna_seq/example.sam");
84 
85     if (!loadFiles(store, annotationFileName, alignmentFileName))
86         return 1;
87 
88     extractGeneIntervals(intervals, store);
89 
90     return 0;
91 }
92 //![main]
93