1 #include <iostream>
2 #include <seqan/store.h>
3 #include <seqan/misc/interval_tree.h>
4 #include <seqan/parallel.h>
5
6 using namespace seqan;
7
8
9 // define used types
10 typedef FragmentStore<> TStore;
11 typedef Value<TStore::TAnnotationStore>::Type TAnnotation;
12 typedef TAnnotation::TId TId;
13 typedef TAnnotation::TPos TPos;
14 typedef IntervalAndCargo<TPos, TId> TInterval;
15
16 //
17 // 1. Load annotations and alignments from files
18 //
loadFiles(TStore & store,std::string const & annotationFileName,std::string const & alignmentFileName)19 bool loadFiles(TStore & store, std::string const & annotationFileName, std::string const & alignmentFileName)
20 {
21 BamFileIn alignmentFile;
22 if (!open(alignmentFile, alignmentFileName.c_str()))
23 {
24 std::cerr << "Couldn't open alignment file " << alignmentFileName << std::endl;
25 return false;
26 }
27 std::cout << "Loading read alignments ..... " << std::flush;
28 readRecords(store, alignmentFile);
29 std::cout << "[" << length(store.alignedReadStore) << "]" << std::endl;
30
31 // load annotations
32 GffFileIn annotationFile;
33 if (!open(annotationFile, toCString(annotationFileName)))
34 {
35 std::cerr << "Couldn't open annotation file" << annotationFileName << std::endl;
36 return false;
37 }
38 std::cout << "Loading genome annotation ... " << std::flush;
39 readRecords(store, annotationFile);
40 std::cout << "[" << length(store.annotationStore) << "]" << std::endl;
41
42 return true;
43 }
44
45 //![solution]
46 //
47 // 2. Extract intervals from gene annotations (grouped by contigId)
48 //
extractGeneIntervals(String<String<TInterval>> & intervals,TStore const & store)49 void extractGeneIntervals(String<String<TInterval> > & intervals, TStore const & store)
50 {
51 // extract intervals from gene annotations (grouped by contigId)
52 resize(intervals, length(store.contigStore));
53
54 Iterator<TStore const, AnnotationTree<> >::Type it = begin(store, AnnotationTree<>());
55
56 if (!goDown(it))
57 return;
58
59 do
60 {
61 SEQAN_ASSERT_EQ(getType(it), "gene");
62 TPos beginPos = getAnnotation(it).beginPos;
63 TPos endPos = getAnnotation(it).endPos;
64 TId contigId = getAnnotation(it).contigId;
65
66 if (beginPos > endPos)
67 std::swap(beginPos, endPos);
68
69 // insert forward-strand interval of the gene and its annotation id
70 appendValue(intervals[contigId], TInterval(beginPos, endPos, value(it)));
71 }
72 while (goRight(it));
73 }
74 //![solution]
75
76 //![main]
main(int argc,char const * argv[])77 int main(int argc, char const * argv[])
78 {
79 TStore store;
80 String<String<TInterval> > intervals;
81
82 std::string annotationFileName = getAbsolutePath("demos/tutorial/simple_rna_seq/example.gtf");
83 std::string alignmentFileName = getAbsolutePath("demos/tutorial/simple_rna_seq/example.sam");
84
85 if (!loadFiles(store, annotationFileName, alignmentFileName))
86 return 1;
87
88 extractGeneIntervals(intervals, store);
89
90 return 0;
91 }
92 //![main]
93