1 /*==========================================================================
2 SeqAn - The Library for Sequence Analysis
3 http://www.seqan.de
4 ============================================================================
5 Copyright (C) 2007
6
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
11
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 ============================================================================
18 $Id$
19 ==========================================================================*/
20
21 #ifndef SEQAN_HEADER_CREATE_GFF_H
22 #define SEQAN_HEADER_CREATE_GFF_H
23
24 namespace seqan
25 {
26 //////////////////////////////////////////////////////////////////////////////
27 //create readCountGFF
28 //////////////////////////////////////////////////////////////////////////////
29
30 template<typename TFile, typename TReadAnnoStore, typename TSpec, typename TConfig>
31 inline void
createReadCountGFF(TFile & readOutput,TReadAnnoStore & readAnnoStore,FragmentStore<TSpec,TConfig> & fragStore)32 createReadCountGFF(TFile & readOutput, TReadAnnoStore & readAnnoStore, FragmentStore<TSpec, TConfig> & fragStore)
33 {
34 typedef typename Iterator<TReadAnnoStore>::Type TCountIter;
35 typedef typename Value<TReadAnnoStore>::Type TReadAnnoStoreElement;
36 typedef typename TReadAnnoStoreElement::TAnnoIds TAnnoIds;
37 typedef typename Iterator<TAnnoIds>::Type TAnnoIdsIter;
38 typedef typename Value<TAnnoIds>::Type TIds;
39 typedef typename Iterator<TIds>::Type TIdsIter;
40
41 typedef typename FragmentStore<TSpec, TConfig>::TAnnotationStore TAnnotationStore;
42 typedef typename Value<TAnnotationStore>::Type TAnnotationStoreElement;
43 typedef typename TAnnotationStoreElement::TId TId;
44
45 static const TId INVALID_ID = TAnnotationStoreElement::INVALID_ID;
46
47 TCountIter itCountStore = begin(readAnnoStore);
48 TCountIter itCountStoreEnd = end(readAnnoStore);
49 TAnnoIdsIter itAnnoIds;
50 TAnnoIdsIter itAnnoIdsEnd;
51 TId firstId;
52 TIds allParentIds;
53 TIdsIter itId;
54 TIdsIter itIdEnd;
55 TIdsIter itP;
56 TIdsIter itPEnd;
57 bool help;
58 bool invalid;
59 for ( ; itCountStore != itCountStoreEnd; goNext(itCountStore))
60 {
61 // read-name:
62 readOutput << getValue(fragStore.readNameStore, position(itCountStore, readAnnoStore))
63 << '\t';
64
65 if (empty(getValue(itCountStore).annoIds) )
66 {
67 readOutput << ".\t.\t.\t.\t.";
68 }
69 else
70 {
71 // contig-name:
72 readOutput << getValue(fragStore.contigNameStore, getValue(itCountStore).contigId)
73 << '\t';
74
75 itAnnoIds = begin(getValue(itCountStore).annoIds);
76 itAnnoIdsEnd = end(getValue(itCountStore).annoIds);
77 while (itAnnoIds != itAnnoIdsEnd && front(*itAnnoIds) == INVALID_ID)
78 {
79 goNext(itAnnoIds);
80 }
81
82 if (itAnnoIds != itAnnoIdsEnd) // not only INVALID_IDS
83 {
84 firstId = front(*itAnnoIds);
85
86 // orientation:
87 if (getValue(fragStore.annotationStore, firstId).beginPos <= getValue(fragStore.annotationStore, firstId).endPos)
88 {
89 readOutput << "+\t";
90 }
91 else
92 {
93 readOutput << "-\t";
94 }
95
96 // Annotation-Ids:
97
98 allParentIds = getValue(itCountStore).parentIds;
99 // output for first parentId if possible; get other parentIds, in which the read doesn't map (entirely with all of his intervals)
100 itAnnoIds = begin(getValue(itCountStore).annoIds);
101 itAnnoIdsEnd = end(getValue(itCountStore).annoIds);
102 for ( ; itAnnoIds != itAnnoIdsEnd; goNext(itAnnoIds))
103 {
104 help = false;
105 itId = begin(*itAnnoIds);
106 itIdEnd = end(*itAnnoIds);
107 for ( ; itId != itIdEnd; goNext(itId))
108 {
109 if (!empty(getValue(itCountStore).parentIds) && getValue(itId) != INVALID_ID && // if current parentId == first id of parentIds (of read entry in readAnnoStore)
110 getValue(fragStore.annotationStore, getValue(itId)).parentId == front(allParentIds))
111 {
112 if (help) // not the first annotation for this read-interval -> ";" sign for overlapping annotations
113 readOutput << ';';
114 readOutput << getValue(fragStore.annotationNameStore, getValue(itId));
115 help = true;
116 }
117 else if (getValue(itId) != INVALID_ID && getValue(fragStore.annotationStore, getValue(itId)).parentId != INVALID_ID && // get other parentIds
118 !isElement_unsorted(getValue(fragStore.annotationStore, getValue(itId)).parentId, allParentIds) ) //?
119 {
120 appendValue(allParentIds, getValue(fragStore.annotationStore, getValue(itId)).parentId, Generous() );
121 }
122 }
123 if ( !empty(getValue(itCountStore).parentIds) && position(itAnnoIds, getValue(itCountStore).annoIds) != endPosition(getValue(itCountStore).annoIds) - 1)
124 readOutput << ':';
125 }
126 if (!empty(getValue(itCountStore).parentIds))
127 readOutput << '\t' << getValue(fragStore.annotationNameStore, front(allParentIds)) << '\t';
128 // outputs for all other parentIds
129 itP = begin(allParentIds);
130 itPEnd = end(allParentIds);
131 if (!empty(getValue(itCountStore).parentIds)) goNext(itP);
132 for ( ; itP != itPEnd; goNext(itP))
133 {
134 itAnnoIds = begin(getValue(itCountStore).annoIds);
135 itAnnoIdsEnd = end(getValue(itCountStore).annoIds);
136 for ( ; itAnnoIds != itAnnoIdsEnd; goNext(itAnnoIds))
137 {
138 invalid = true; // if no annotation for the current parent in interval -> UNKOWN_REGION
139 itId = begin(*itAnnoIds);
140 itIdEnd = end(*itAnnoIds);
141 for ( ; itId != itIdEnd; goNext(itId))
142 {
143 if (getValue(itId) != INVALID_ID && getValue(fragStore.annotationStore, getValue(itId)).parentId == getValue(itP))
144 {
145 if (!invalid) // not the first annotation for this interval -> ";" sign for overlapping annotations
146 readOutput << ';';
147 readOutput << getValue(fragStore.annotationNameStore, getValue(itId));
148 invalid = false;
149 }
150 }
151 if (invalid)
152 readOutput << "UNKNOWN_REGION";
153 if (position(itAnnoIds, getValue(itCountStore).annoIds) != endPosition(getValue(itCountStore).annoIds) - 1)
154 readOutput << ':';
155 }
156 readOutput << '\t';
157 if (getValue(itP) != INVALID_ID)
158 readOutput << getValue(fragStore.annotationNameStore, getValue(itP));
159 else
160 readOutput << "NO_PARENT";
161 readOutput << '\t';
162 }
163 }
164 else // only INVALID_IDS
165 {
166 readOutput << ".\t";
167
168 // invalid_ids for each interval
169 itAnnoIds = begin(getValue(itCountStore).annoIds);
170 itAnnoIdsEnd = end(getValue(itCountStore).annoIds);
171 for ( ; itAnnoIds != itAnnoIdsEnd; goNext(itAnnoIds))
172 {
173 readOutput << "UNKNOWN_REGION";
174 if (position(itAnnoIds, getValue(itCountStore).annoIds) != endPosition(getValue(itCountStore).annoIds) - 1)
175 readOutput << ':';
176 }
177 readOutput << "\tUNKNOWN_REGION";
178 }
179 }
180 readOutput << "\n";
181 }
182 }
183
184
185
186 //////////////////////////////////////////////////////////////////////////////
187 //create AnnoCountGFF
188 //////////////////////////////////////////////////////////////////////////////
189
190 template<typename TFile, typename TAnnoCountStore, typename TAnnoNormStore, typename TSpec, typename TConfig, typename TMap>
191 inline void
createAnnoCountGFF(TFile & annoOutput,TAnnoCountStore & annoCountStore,TAnnoNormStore & annoNormStore,FragmentStore<TSpec,TConfig> & fragStore,TMap & mapO)192 createAnnoCountGFF(TFile & annoOutput, TAnnoCountStore & annoCountStore, TAnnoNormStore &annoNormStore, FragmentStore<TSpec, TConfig> & fragStore, TMap &mapO)
193 {
194 typedef typename FragmentStore<TSpec, TConfig>::TContigPos TContigPos;
195 typedef typename FragmentStore<TSpec, TConfig>::TAnnotationStore TAnnotationStore;
196 typedef typename Value<TAnnotationStore>::Type TAnnotationStoreElement;
197 typedef typename TAnnotationStoreElement::TId TId;
198 typedef typename Iterator<TAnnoCountStore>::Type TCountIter;
199 typedef typename Iterator<TAnnoNormStore>::Type TNormIter;
200 typedef typename Iterator<TAnnotationStore>::Type TAnnoIter;
201
202 static const TId INVALID_ID = TAnnotationStoreElement::INVALID_ID;
203 static const TContigPos INVALID_POS = TAnnotationStoreElement::INVALID_POS;
204
205 TCountIter itCount = begin(annoCountStore);
206 TCountIter itCountEnd = end(annoCountStore);
207 TAnnoIter itAnno = begin(fragStore.annotationStore);
208 TNormIter itNorm = begin(annoNormStore);
209
210 for ( ; itCount != itCountEnd; goNext(itCount), goNext(itAnno), goNext(itNorm))
211 {
212 if (getValue(itAnno).typeId != INVALID_ID)
213 if (fragStore.annotationTypeStore[getValue(itAnno).typeId] == "<root>") continue;
214 // contig-name
215 if (getValue(itAnno).contigId == INVALID_ID )
216 annoOutput << "INVALID_ID\t";
217 else
218 annoOutput << getValue(fragStore.contigNameStore, getValue(itAnno).contigId) << '\t';
219 annoOutput << "Annotation_Count\tregion\t";
220 // startposition endposition orientation .
221 if (getValue(itAnno).beginPos == INVALID_POS)
222 {
223 annoOutput << ".\t.\t" << getValue(itCount);
224 if (getValue(itAnno).parentId == INVALID_ID || (fragStore.annotationStore[getValue(itAnno).parentId].typeId != INVALID_ID && fragStore.annotationTypeStore[fragStore.annotationStore[getValue(itAnno).parentId].typeId] == "<root>"))
225 {
226 if (mapValue(mapO, position(itAnno, fragStore.annotationStore)) == 0)
227 annoOutput << "\t+\t.\t";
228 else
229 annoOutput << "\t-\t.\t";
230 }
231 else
232 {
233 annoOutput << "\t.\t.\t";
234 }
235 }
236 else
237 {
238 if (getValue(itAnno).beginPos <= getValue(itAnno).endPos)
239 annoOutput << getValue(itAnno).beginPos + 1
240 << '\t'
241 << getValue(itAnno).endPos
242 << '\t'
243 << getValue(itCount)
244 << "\t+\t.\t";
245 else
246 annoOutput << getValue(itAnno).endPos + 1
247 << '\t'
248 << getValue(itAnno).beginPos
249 << '\t'
250 << getValue(itCount)
251 << "\t-\t.\t";
252 }
253 // annotation-name (parent annotation-name)
254 if (getValue(itAnno).parentId == INVALID_ID || (fragStore.annotationStore[getValue(itAnno).parentId].typeId != INVALID_ID && fragStore.annotationTypeStore[fragStore.annotationStore[getValue(itAnno).parentId].typeId] == "<root>"))
255 annoOutput << "ID="
256 << getValue(fragStore.annotationNameStore, position(itAnno, fragStore.annotationStore))
257 << ';';
258 else
259 annoOutput << "ID="
260 << getValue(fragStore.annotationNameStore, position(itAnno, fragStore.annotationStore))
261 << ";ParentID="
262 << getValue(fragStore.annotationNameStore, getValue(itAnno).parentId)
263 << ';';
264 annoOutput << formattedNumber("%f", *itNorm) << ";\n";
265 }
266 }
267
268
269 //////////////////////////////////////////////////////////////////////////////
270 //create tupleCountGFF
271 //////////////////////////////////////////////////////////////////////////////
272
273 template<typename TFile, typename TTupleCountStore, typename TSpec, typename TConfig>
274 inline void
createTupleCountGFF(TFile & tupleOutput,TTupleCountStore & tupleCountStore,FragmentStore<TSpec,TConfig> & fragStore,unsigned thresholdCount,double thresholdRPKM)275 createTupleCountGFF(TFile & tupleOutput, TTupleCountStore & tupleCountStore, FragmentStore<TSpec, TConfig> & fragStore, unsigned thresholdCount, double thresholdRPKM)
276 {
277 typedef typename FragmentStore<TSpec, TConfig>::TAnnotationStore TAnnotationStore;
278 typedef typename Value<TAnnotationStore>::Type TAnnotationStoreElement;
279 typedef typename TAnnotationStoreElement::TId TId;
280
281 typedef typename Iterator<TTupleCountStore>::Type TCountStoreIter;
282 typedef typename Value<TTupleCountStore>::Type TTupleCountStoreElement;
283 typedef typename TTupleCountStoreElement::TTupleList TTupleList;
284 typedef typename TTupleCountStoreElement::TTupleCounts TTupleCounts;
285 typedef typename TTupleCountStoreElement::TTupleNorm TTupleNorm;
286 typedef typename Value<TTupleList>::Type TTupel;
287 typedef typename Iterator<TTupleList>::Type TTupleListIter;
288 typedef typename Iterator<TTupleCounts>::Type TCountIter;
289 typedef typename Iterator<TTupleNorm>::Type TNormIter;
290 typedef typename Iterator<TTupel>::Type TTupelIter;
291
292 static const TId INVALID_ID = TAnnotationStoreElement::INVALID_ID;
293
294 if (!empty(tupleCountStore))
295 {
296 TCountStoreIter itCountStore = begin(tupleCountStore);
297 TCountStoreIter itCountStoreEnd = end(tupleCountStore);
298 TAnnotationStoreElement currentElement;
299 TTupleListIter itT;
300 TTupleListIter itTEnd;
301 TCountIter itC;
302 TNormIter itN;
303 TTupelIter itId;
304 TTupelIter itIdEnd;
305 for ( ; itCountStore != itCountStoreEnd; goNext(itCountStore))
306 {
307 currentElement = getValue(fragStore.annotationStore, position(itCountStore, tupleCountStore));
308
309 itT = begin(getValue(itCountStore).readConnections);
310 itTEnd = end(getValue(itCountStore).readConnections);
311 itC = begin(getValue(itCountStore).readConnectionCounts);
312 itN = begin(getValue(itCountStore).readConnectionNorm);
313 // read connections:
314 for ( ; itT != itTEnd; goNext(itT), goNext(itC), goNext(itN))
315 {
316 if (getValue(itC) >= thresholdCount && getValue(itN) >= thresholdRPKM)
317 {
318 // contig-name
319 tupleOutput << getValue(fragStore.contigNameStore, currentElement.contigId) << '\t';
320 // parent-name
321 if (currentElement.parentId == INVALID_ID )
322 tupleOutput << "NO_PARENT\t";
323 else
324 tupleOutput << getValue(fragStore.annotationNameStore, currentElement.parentId) << '\t';
325 // orientation
326 if ( currentElement.beginPos <= currentElement.endPos )
327 tupleOutput << "+\t";
328 else
329 tupleOutput << "-\t";
330 // first annotationId of tuple (store implicit)
331 tupleOutput << getValue(fragStore.annotationNameStore, position(itCountStore, tupleCountStore));
332 // other annotationIds
333 itId = begin(getValue(itT));
334 itIdEnd = end(getValue(itT));
335 for ( ; itId != itIdEnd; goNext(itId))
336 tupleOutput << ":" << getValue(fragStore.annotationNameStore, getValue(itId));
337 tupleOutput << '\t';
338 // tuple count
339 tupleOutput << getValue(itC) << '\t';
340 // normalized tuple count
341 tupleOutput << formattedNumber("%f", *itN) << '\n';
342 }
343 }
344 //matepair connections:
345 itT = begin(getValue(itCountStore).matePairConnections);
346 itTEnd = end(getValue(itCountStore).matePairConnections);
347 itC = begin(getValue(itCountStore).matePairConnectionCounts);
348 itN = begin(getValue(itCountStore).matePairConnectionNorm);
349 for ( ; itT != itTEnd; goNext(itT), goNext(itC), goNext(itN))
350 {
351 if (getValue(itC) >= thresholdCount && getValue(itN) >= thresholdRPKM)
352 {
353 // contig-name
354 tupleOutput << getValue(fragStore.contigNameStore, currentElement.contigId) << '\t';
355 // parent-name
356 if (currentElement.parentId == INVALID_ID )
357 tupleOutput << "NO_PARENT\t";
358 else
359 tupleOutput << getValue(fragStore.annotationNameStore, currentElement.parentId) << '\t';
360 // orientation
361 if ( currentElement.beginPos <= currentElement.endPos )
362 tupleOutput << "+\t";
363 else
364 tupleOutput << "-\t";
365 // first annotationId of tuple
366 tupleOutput << getValue(fragStore.annotationNameStore, position(itCountStore, tupleCountStore));
367 // other annotationIds of first read
368 itId = begin(getValue(itT));
369 itIdEnd = end(getValue(itT));
370 for ( ; itId != itIdEnd && getValue(itId) != INVALID_ID; goNext(itId))
371 tupleOutput << ":" << getValue(fragStore.annotationNameStore, getValue(itId));
372 goNext(itId);
373 tupleOutput << '^';
374 // annotationIds of second read
375 tupleOutput << getValue(fragStore.annotationNameStore, getValue(itId));
376 goNext(itId);
377 for ( ; itId != itIdEnd; goNext(itId))
378 tupleOutput << ':' << getValue(fragStore.annotationNameStore, getValue(itId));
379 tupleOutput << '\t';
380 // tuple count
381 tupleOutput << *itC << '\t';
382 // normalized tuple count
383 tupleOutput << formattedNumber("%f", *itN) << '\n';
384 }
385 }
386 }
387 }
388 }
389
390
391 //////////////////////////////////////////////////////////////////////////////
392
393
394 }// namespace seqan
395
396 #endif //#ifndef SEQAN_HEADER_...
397