1 //![header]
2 /*==========================================================================
3   SeqAn - The Library for Sequence Analysis
4   http://www.seqan.de
5   ===========================================================================
6   Copyright (C) 2010
7 
8   This program is free software: you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation, either version 3 of the License, or
11   (at your option) any later version.
12 
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17 
18   You should have received a copy of the GNU General Public License
19   along with this program.  If not, see <http://www.gnu.org/licenses/>.
20   ===========================================================================
21   Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
22   ===========================================================================
23   Minimapper -- Minimal read mapping in SeqAn.
24   ===========================================================================
25   This file contains code for a minimal read mapper with heavy restrictions.
26   The restrictions are explained in the tutorial chapter, together with
27   suggestions on how to extend this code.
28   ===========================================================================*/
29 //![header]
30 //![includes]
31 #include <cstdio>
32 #include <iostream>
33 
34 #include <seqan/basic.h>
35 #include <seqan/stream.h>
36 #include <seqan/index.h>
37 #include <seqan/store.h>
38 
39 using namespace seqan;
40 //![includes]
41 
42 //![typedefs]
43 // Some typedefs.
44 typedef FragmentStore<>::TReadSeqStore TReadSeqStore;
45 typedef Value<TReadSeqStore>::Type TReadSeq;
46 typedef FragmentStore<>::TContigStore TContigStore;
47 typedef Value<TContigStore>::Type TContigStoreElement;
48 typedef TContigStoreElement::TContigSeq TContigSeq;
49 typedef Index<TReadSeqStore, IndexQGram<UngappedShape<11>, OpenAddressing> > TIndex;
50 typedef Pattern<TIndex, Swift<SwiftSemiGlobal> > TPattern;
51 typedef Finder<TContigSeq, Swift<SwiftSemiGlobal> > TFinder;
52 typedef FragmentStore<>::TAlignedReadStore TAlignedReadStore;
53 typedef Value<TAlignedReadStore>::Type TAlignedRead;
54 //![typedefs]
55 
56 //![global-constants]
57 const double EPSILON = 0.08;
58 //![global-constants]
59 
60 //![main-input]
main(int argc,char * argv[])61 int main(int argc, char * argv[])
62 {
63     // 0) Handle command line arguments.
64     if (argc < 3)
65     {
66         std::cerr << "Invalid number of arguments." << std::endl
67                   << "USAGE: minimapper GENOME.fasta READS.fasta OUT.sam" << std::endl;
68         return 1;
69     }
70 
71     // 1) Load contigs and reads.
72     FragmentStore<> fragStore;
73     if (!loadContigs(fragStore, argv[1]))
74         return 1;
75 
76     if (!loadReads(fragStore, argv[2]))
77         return 1;
78 //![main-input]
79 
80 //![pattern-finder]
81     // 2) Build an index over all reads and a SWIFT pattern over this index.
82     TIndex index(fragStore.readSeqStore);
83     TPattern pattern(index);
84 //![pattern-finder]
85 
86 //![swift]
87     // 3) Enumerate all epsilon matches.
88     for (unsigned i = 0; i < length(fragStore.contigStore); ++i)
89     {
90         TFinder finder(fragStore.contigStore[i].seq);
91         while (find(finder, pattern, EPSILON))
92         {
93 //![swift]
94 //![verification]
95             // Verify match.
96             Finder<TContigSeq> verifyFinder(fragStore.contigStore[i].seq);
97             setPosition(verifyFinder, beginPosition(finder));
98             Pattern<TReadSeq, HammingSimple> verifyPattern(fragStore.readSeqStore[position(pattern).i1]);
99             unsigned readLength = length(fragStore.readSeqStore[position(pattern).i1]);
100             int minScore = -static_cast<int>(EPSILON * readLength);
101             while (find(verifyFinder, verifyPattern, minScore) && position(verifyFinder) < endPosition(infix(finder)))
102             {
103                 TAlignedRead match(length(fragStore.alignedReadStore), position(pattern).i1, i,
104                                    beginPosition(verifyFinder), endPosition(verifyFinder));
105                 appendValue(fragStore.alignedReadStore, match);
106             }
107         }
108     }
109 //![verification]
110 
111 //![main-output]
112     // 4) Write out SAM file.
113     BamFileOut bamFile(argv[3]);
114     writeRecords(bamFile, fragStore);
115 
116     return 0;
117 }
118 //![main-output]
119