1 //
2 // Created by mad on 10/26/15.
3 //
4
5 #include <iostream>
6 #include <queue>
7
8
9 #include "SubstitutionMatrix.h"
10 #include "Sequence.h"
11 #include "Parameters.h"
12
13 const char* binary_name = "test_kwaymerge";
14 struct KmerEntry{
15 unsigned int seqId;
16 short diagonal;
KmerEntryKmerEntry17 KmerEntry(){};
KmerEntryKmerEntry18 KmerEntry(unsigned int seqId, short diagonal) : seqId(seqId), diagonal(diagonal){}
19 };
20
21 void mergeKmerEntryLists(KmerEntry **entries, size_t * entrySizes, const int fileCnt);
22
main(int,const char **)23 int main (int, const char**) {
24 const int fileCnt = 3;
25 std::vector<KmerEntry> array1;
26 array1.push_back(KmerEntry(1,0));
27 array1.push_back(KmerEntry(2,10));
28 array1.push_back(KmerEntry(UINT_MAX,0));
29 array1.push_back(KmerEntry(4,0));
30 array1.push_back(KmerEntry(8,-2));
31 array1.push_back(KmerEntry(UINT_MAX,0));
32
33 std::vector<KmerEntry> array2;
34 array2.push_back(KmerEntry(1,0));
35 array2.push_back(KmerEntry(2,2));
36 array2.push_back(KmerEntry(3,9));
37 array2.push_back(KmerEntry(UINT_MAX,0));
38 array2.push_back(KmerEntry(5,0));
39 array2.push_back(KmerEntry(10,42));
40 array2.push_back(KmerEntry(UINT_MAX,0));
41
42
43 std::vector<KmerEntry> array3;
44 array3.push_back(KmerEntry(1,0));
45 array3.push_back(KmerEntry(4,2));
46 array3.push_back(KmerEntry(UINT_MAX,0));
47 array3.push_back(KmerEntry(5,0));
48 array3.push_back(KmerEntry(8,42));
49 array3.push_back(KmerEntry(UINT_MAX,0));
50
51 KmerEntry ** entries= new KmerEntry*[fileCnt];
52 entries[0]=array1.data();
53 entries[1]=array2.data();
54 entries[2]=array3.data();
55
56 size_t * entryCnts= new size_t[fileCnt];
57 entryCnts[0]=array1.size();
58 entryCnts[1]=array2.size();
59 entryCnts[2]=array3.size();
60
61
62 mergeKmerEntryLists(entries, entryCnts, fileCnt);
63 delete [] entries;
64 delete [] entryCnts;
65 return 0;
66 }
67
68 struct KmerPosition {
69 size_t kmer;
70 unsigned int id;
71 short pos;
72 unsigned int file;
73
KmerPositionKmerPosition74 KmerPosition(){}
KmerPositionKmerPosition75 KmerPosition(size_t kmer, unsigned int id,short pos, unsigned int file):
76 kmer(kmer), id(id), pos(pos), file(file) {}
77 };
78
79
80 class CompareResultBySeqId {
81 public:
operator ()(KmerPosition & first,KmerPosition & second)82 bool operator() (KmerPosition & first, KmerPosition & second) {
83 //return (first.eval < second.eval);
84 if(first.kmer > second.kmer )
85 return true;
86 if(second.kmer > first.kmer )
87 return false;
88 if(first.id > second.id )
89 return true;
90 if(second.id > first.id )
91 return false;
92 if(first.pos > second.pos )
93 return true;
94 if(second.pos > first.pos )
95 return false;
96 return false;
97 }
98 };
99
100 typedef std::priority_queue<KmerPosition, std::vector<KmerPosition>, CompareResultBySeqId> KmerPositionQueue;
101
102
queueNextEntry(KmerPositionQueue & queue,int file,size_t offsetPos,KmerEntry * entries,size_t entrySize)103 size_t queueNextEntry(
104 KmerPositionQueue &queue, int file,
105 size_t offsetPos,
106 KmerEntry *entries, size_t entrySize) {
107 if(offsetPos + 1 >= entrySize){
108 return offsetPos;
109 }
110 unsigned int repSeqId = entries[offsetPos].seqId;
111 size_t pos = 0;
112 while(entries[offsetPos + pos].seqId != UINT_MAX){
113 queue.push(KmerPosition(repSeqId, entries[offsetPos+pos].seqId, entries[offsetPos+pos].diagonal, file));
114 pos++;
115 }
116 queue.push(KmerPosition(repSeqId, UINT_MAX, 0l, file));
117 pos++;
118 return offsetPos+pos;
119 }
120
121
mergeKmerEntryLists(KmerEntry ** entries,size_t * entrySizes,const int fileCnt)122 void mergeKmerEntryLists(KmerEntry **entries, size_t * entrySizes, const int fileCnt) {
123 KmerPositionQueue queue;
124 size_t * offsetPos = new size_t[fileCnt];
125 for(int file = 0; file < fileCnt; file++ ){
126 offsetPos[file]=queueNextEntry(queue, file, 0, entries[file], entrySizes[file]);
127 }
128 KmerPosition prevsKmerPos;
129 prevsKmerPos.id = UINT_MAX;
130 while(queue.empty() == false) {
131 KmerPosition res = queue.top();
132 queue.pop();
133 if(res.id==UINT_MAX){
134 offsetPos[res.file] = queueNextEntry(queue, res.file, offsetPos[res.file],
135 entries[res.file], entrySizes[res.file]);
136 }
137 // if its not a duplicate
138 if(prevsKmerPos.id != res.id && res.id!=UINT_MAX){
139 std::cout << res.kmer << "\t" << res.id <<"\t"<<res.pos << "\t"<< res.file << std::endl;
140
141 }
142 prevsKmerPos = res;
143
144 }
145 delete [] offsetPos;
146 }
147
148