1 //
2 // Created by mad on 10/26/15.
3 //
4 
5 #include <iostream>
6 #include <queue>
7 
8 
9 #include "SubstitutionMatrix.h"
10 #include "Sequence.h"
11 #include "Parameters.h"
12 
13 const char* binary_name = "test_kwaymerge";
14 struct KmerEntry{
15     unsigned int seqId;
16     short diagonal;
KmerEntryKmerEntry17     KmerEntry(){};
KmerEntryKmerEntry18     KmerEntry(unsigned int seqId, short diagonal) : seqId(seqId), diagonal(diagonal){}
19 };
20 
21 void mergeKmerEntryLists(KmerEntry **entries, size_t * entrySizes, const int fileCnt);
22 
main(int,const char **)23 int main (int, const char**) {
24     const int fileCnt = 3;
25     std::vector<KmerEntry> array1;
26     array1.push_back(KmerEntry(1,0));
27     array1.push_back(KmerEntry(2,10));
28     array1.push_back(KmerEntry(UINT_MAX,0));
29     array1.push_back(KmerEntry(4,0));
30     array1.push_back(KmerEntry(8,-2));
31     array1.push_back(KmerEntry(UINT_MAX,0));
32 
33     std::vector<KmerEntry> array2;
34     array2.push_back(KmerEntry(1,0));
35     array2.push_back(KmerEntry(2,2));
36     array2.push_back(KmerEntry(3,9));
37     array2.push_back(KmerEntry(UINT_MAX,0));
38     array2.push_back(KmerEntry(5,0));
39     array2.push_back(KmerEntry(10,42));
40     array2.push_back(KmerEntry(UINT_MAX,0));
41 
42 
43     std::vector<KmerEntry> array3;
44     array3.push_back(KmerEntry(1,0));
45     array3.push_back(KmerEntry(4,2));
46     array3.push_back(KmerEntry(UINT_MAX,0));
47     array3.push_back(KmerEntry(5,0));
48     array3.push_back(KmerEntry(8,42));
49     array3.push_back(KmerEntry(UINT_MAX,0));
50 
51     KmerEntry ** entries= new KmerEntry*[fileCnt];
52     entries[0]=array1.data();
53     entries[1]=array2.data();
54     entries[2]=array3.data();
55 
56     size_t * entryCnts= new size_t[fileCnt];
57     entryCnts[0]=array1.size();
58     entryCnts[1]=array2.size();
59     entryCnts[2]=array3.size();
60 
61 
62     mergeKmerEntryLists(entries, entryCnts, fileCnt);
63     delete [] entries;
64     delete [] entryCnts;
65     return 0;
66 }
67 
68 struct KmerPosition {
69     size_t kmer;
70     unsigned int id;
71     short pos;
72     unsigned int file;
73 
KmerPositionKmerPosition74     KmerPosition(){}
KmerPositionKmerPosition75     KmerPosition(size_t kmer, unsigned int id,short pos, unsigned int file):
76             kmer(kmer), id(id), pos(pos), file(file) {}
77 };
78 
79 
80 class CompareResultBySeqId {
81 public:
operator ()(KmerPosition & first,KmerPosition & second)82     bool operator() (KmerPosition & first, KmerPosition & second) {
83         //return (first.eval < second.eval);
84         if(first.kmer > second.kmer )
85             return true;
86         if(second.kmer > first.kmer )
87             return false;
88         if(first.id > second.id )
89             return true;
90         if(second.id > first.id )
91             return false;
92         if(first.pos > second.pos )
93             return true;
94         if(second.pos > first.pos )
95             return false;
96         return false;
97     }
98 };
99 
100 typedef std::priority_queue<KmerPosition, std::vector<KmerPosition>, CompareResultBySeqId> KmerPositionQueue;
101 
102 
queueNextEntry(KmerPositionQueue & queue,int file,size_t offsetPos,KmerEntry * entries,size_t entrySize)103 size_t queueNextEntry(
104         KmerPositionQueue &queue, int file,
105         size_t offsetPos,
106         KmerEntry *entries, size_t entrySize) {
107     if(offsetPos + 1 >= entrySize){
108         return offsetPos;
109     }
110     unsigned int repSeqId = entries[offsetPos].seqId;
111     size_t pos = 0;
112     while(entries[offsetPos + pos].seqId != UINT_MAX){
113         queue.push(KmerPosition(repSeqId, entries[offsetPos+pos].seqId,  entries[offsetPos+pos].diagonal, file));
114         pos++;
115     }
116     queue.push(KmerPosition(repSeqId, UINT_MAX, 0l, file));
117     pos++;
118     return offsetPos+pos;
119 }
120 
121 
mergeKmerEntryLists(KmerEntry ** entries,size_t * entrySizes,const int fileCnt)122 void mergeKmerEntryLists(KmerEntry **entries, size_t * entrySizes, const int fileCnt) {
123     KmerPositionQueue queue;
124     size_t * offsetPos = new size_t[fileCnt];
125     for(int file = 0; file < fileCnt; file++ ){
126         offsetPos[file]=queueNextEntry(queue, file, 0, entries[file], entrySizes[file]);
127     }
128     KmerPosition prevsKmerPos;
129     prevsKmerPos.id = UINT_MAX;
130     while(queue.empty() == false) {
131         KmerPosition res = queue.top();
132         queue.pop();
133         if(res.id==UINT_MAX){
134             offsetPos[res.file] = queueNextEntry(queue, res.file, offsetPos[res.file],
135                                                  entries[res.file], entrySizes[res.file]);
136         }
137         // if its not a duplicate
138         if(prevsKmerPos.id != res.id && res.id!=UINT_MAX){
139             std::cout << res.kmer << "\t" << res.id <<"\t"<<res.pos << "\t"<< res.file << std::endl;
140 
141         }
142         prevsKmerPos = res;
143 
144     }
145     delete [] offsetPos;
146 }
147 
148