1 #include <iostream>
2 #include <seqan/graph_msa.h>
3 #include <seqan/index.h>
4
5 using namespace seqan;
6 using namespace std;
7
main(int argc,const char * argv[])8 int main(int argc, const char *argv[])
9 {
10 typedef String<Dna> TSequence;
11 typedef StringSet<TSequence> TSequenceSet;
12
13 if(argc < 3)
14 {
15 ::std::cout << "\nAt least two Fasta sequence files need to be specified.\n\n";
16 ::std::cout << "Usage: ./segmentalignment <seq1.fa> <seq2.fa> ...\n\n";
17 return 1;
18 }
19
20 TSequenceSet seqs;
21 for(int i = 1; i < argc; ++i)
22 appendValue(seqs, String<Dna, FileReader<Fasta> >(argv[i]));
23
24
25 typedef Fragment<> TMatch;
26 String<TMatch> matches;
27
28 typedef Index<TSequenceSet> TIndex;
29 TIndex index(seqs);
30
31 Iterator<TIndex,Mums>::Type mumIt(index, 5);
32 String<SAValue<TIndex>::Type> occs;
33
34 while (!atEnd(mumIt))
35 {
36 occs = getOccurrences(mumIt);
37 for(unsigned i = 0; i < length(occs); ++i)
38 {
39 for(unsigned j = i+1; j < length(occs); ++j)
40 {
41 TMatch m(getValueI1(occs[i]),getValueI2(occs[i]),getValueI1(occs[j]), getValueI2(occs[j]),repLength(mumIt));
42 appendValue(matches,m);
43 }
44 }
45 ++mumIt;
46 }
47
48 typedef StringSet<TSequence,Dependent<> > TDepSequenceSet;
49 typedef Graph<Alignment<TDepSequenceSet> > TAlignmentGraph;
50 TAlignmentGraph g(seqs);
51 matchRefinement(matches, seqs, g);
52 clear(matches);
53
54 tripletLibraryExtension(g);
55
56 typedef String<double> TDistanceMatrix;
57 TDistanceMatrix distanceMatrix;
58 getDistanceMatrix(g, distanceMatrix,KmerDistance());
59
60 typedef Graph<Tree<double> > TGuideTree;
61 TGuideTree guideTree;
62 upgmaTree(distanceMatrix, guideTree);
63
64 TAlignmentGraph gOut(seqs);
65 progressiveAlignment(g, guideTree, gOut);
66 cout << gOut;
67
68 return 0;
69 }
70