1  /*==========================================================================
2                 SeqAn - The Library for Sequence Analysis
3                           http://www.seqan.de
4  ============================================================================
5   Copyright (C) 2007
6 
7   This library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 3 of the License, or (at your option) any later version.
11 
12   This library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15   Lesser General Public License for more details.
16 
17  ============================================================================
18   $Id$
19  ==========================================================================*/
20 
21 #ifndef SEQAN_HEADER_CREATE_GFF_H
22 #define SEQAN_HEADER_CREATE_GFF_H
23 
24 namespace seqan
25 {
26 //////////////////////////////////////////////////////////////////////////////
27 //create readCountGFF
28 //////////////////////////////////////////////////////////////////////////////
29 
30 template<typename TFile, typename TReadAnnoStore, typename TSpec, typename TConfig>
31 inline void
createReadCountGFF(TFile & readOutput,TReadAnnoStore & readAnnoStore,FragmentStore<TSpec,TConfig> & fragStore)32 createReadCountGFF(TFile & readOutput, TReadAnnoStore & readAnnoStore, FragmentStore<TSpec, TConfig> & fragStore)
33 {
34 	typedef typename Iterator<TReadAnnoStore>::Type 			TCountIter;
35 	typedef typename Value<TReadAnnoStore>::Type				TReadAnnoStoreElement;
36 	typedef typename TReadAnnoStoreElement::TAnnoIds			TAnnoIds;
37 	typedef typename Iterator<TAnnoIds>::Type				TAnnoIdsIter;
38 	typedef typename Value<TAnnoIds>::Type					TIds;
39 	typedef typename Iterator<TIds>::Type					TIdsIter;
40 
41 	typedef typename FragmentStore<TSpec, TConfig>::TAnnotationStore 	TAnnotationStore;
42 	typedef typename Value<TAnnotationStore>::Type 				TAnnotationStoreElement;
43 	typedef typename TAnnotationStoreElement::TId 				TId;
44 
45 	static const TId INVALID_ID = TAnnotationStoreElement::INVALID_ID;
46 
47 	TCountIter itCountStore = begin(readAnnoStore);
48 	TCountIter itCountStoreEnd = end(readAnnoStore);
49 	TAnnoIdsIter itAnnoIds;
50 	TAnnoIdsIter itAnnoIdsEnd;
51 	TId firstId;
52 	TIds allParentIds;
53 	TIdsIter itId;
54 	TIdsIter itIdEnd;
55 	TIdsIter itP;
56 	TIdsIter itPEnd;
57 	bool help;
58 	bool invalid;
59 	for ( ; itCountStore != itCountStoreEnd; goNext(itCountStore))
60 	{
61 		// read-name:
62         readOutput << getValue(fragStore.readNameStore, position(itCountStore, readAnnoStore))
63                    << '\t';
64 
65 		if (empty(getValue(itCountStore).annoIds) )
66 		{
67             readOutput << ".\t.\t.\t.\t.";
68 		}
69 		else
70 		{
71 			// contig-name:
72 			readOutput << getValue(fragStore.contigNameStore, getValue(itCountStore).contigId)
73                                    << '\t';
74 
75 			itAnnoIds = begin(getValue(itCountStore).annoIds);
76 			itAnnoIdsEnd = end(getValue(itCountStore).annoIds);
77 			while (itAnnoIds != itAnnoIdsEnd && front(*itAnnoIds) == INVALID_ID)
78 			{
79 				goNext(itAnnoIds);
80 			}
81 
82 			if (itAnnoIds != itAnnoIdsEnd) // not only INVALID_IDS
83 			{
84 				firstId = front(*itAnnoIds);
85 
86 				// orientation:
87 				if (getValue(fragStore.annotationStore, firstId).beginPos <= getValue(fragStore.annotationStore, firstId).endPos)
88 				{
89                     readOutput << "+\t";
90 				}
91 				else
92                 {
93                     readOutput << "-\t";
94                 }
95 
96 				// Annotation-Ids:
97 
98 				allParentIds =  getValue(itCountStore).parentIds;
99 				// output for first parentId if possible; get other parentIds, in which the read doesn't map (entirely with all of his intervals)
100 				itAnnoIds = begin(getValue(itCountStore).annoIds);
101 				itAnnoIdsEnd = end(getValue(itCountStore).annoIds);
102 				for ( ; itAnnoIds != itAnnoIdsEnd; goNext(itAnnoIds))
103 				{
104 					help = false;
105 					itId = begin(*itAnnoIds);
106 					itIdEnd = end(*itAnnoIds);
107 					for ( ; itId != itIdEnd; goNext(itId))
108 					{
109 						if (!empty(getValue(itCountStore).parentIds) && getValue(itId) != INVALID_ID && 	// if current parentId == first id of parentIds (of read entry in readAnnoStore)
110 						    getValue(fragStore.annotationStore, getValue(itId)).parentId == front(allParentIds))
111 						{
112 							if (help) 	// not the first annotation for this read-interval -> ";" sign for overlapping annotations
113                                 readOutput << ';';
114                             readOutput << getValue(fragStore.annotationNameStore, getValue(itId));
115 							help = true;
116 						}
117 						else if (getValue(itId) != INVALID_ID && getValue(fragStore.annotationStore, getValue(itId)).parentId != INVALID_ID &&	// get other parentIds
118 							!isElement_unsorted(getValue(fragStore.annotationStore, getValue(itId)).parentId, allParentIds) ) //?
119 						{
120 							appendValue(allParentIds, getValue(fragStore.annotationStore, getValue(itId)).parentId, Generous() );
121 						}
122 					}
123 					if ( !empty(getValue(itCountStore).parentIds) && position(itAnnoIds, getValue(itCountStore).annoIds) != endPosition(getValue(itCountStore).annoIds) - 1)
124                         readOutput << ':';
125 				}
126 				if (!empty(getValue(itCountStore).parentIds))
127 					readOutput << '\t' << getValue(fragStore.annotationNameStore, front(allParentIds)) << '\t';
128 				// outputs for all other parentIds
129 				itP = begin(allParentIds);
130 				itPEnd = end(allParentIds);
131 				if (!empty(getValue(itCountStore).parentIds)) goNext(itP);
132 				for ( ; itP != itPEnd; goNext(itP))
133 				{
134 					itAnnoIds = begin(getValue(itCountStore).annoIds);
135 					itAnnoIdsEnd = end(getValue(itCountStore).annoIds);
136 					for ( ; itAnnoIds != itAnnoIdsEnd; goNext(itAnnoIds))
137 					{
138 						invalid = true;				// if no annotation for the current parent in  interval -> UNKOWN_REGION
139 						itId = begin(*itAnnoIds);
140                         itIdEnd = end(*itAnnoIds);
141 						for ( ; itId != itIdEnd; goNext(itId))
142 						{
143 							if (getValue(itId) != INVALID_ID && getValue(fragStore.annotationStore, getValue(itId)).parentId == getValue(itP))
144 							{
145 								if (!invalid)	// not the first annotation for this interval -> ";" sign for overlapping annotations
146                                     readOutput << ';';
147 								readOutput << getValue(fragStore.annotationNameStore, getValue(itId));
148 								invalid = false;
149 							}
150 						}
151 						if (invalid)
152                             readOutput << "UNKNOWN_REGION";
153 						if (position(itAnnoIds, getValue(itCountStore).annoIds) != endPosition(getValue(itCountStore).annoIds) - 1)
154                             readOutput << ':';
155 					}
156                     readOutput << '\t';
157 					if (getValue(itP) != INVALID_ID)
158                         readOutput << getValue(fragStore.annotationNameStore, getValue(itP));
159 					else
160                         readOutput << "NO_PARENT";
161                     readOutput << '\t';
162 				}
163 			}
164 			else  // only INVALID_IDS
165 			{
166                 readOutput << ".\t";
167 
168 				// invalid_ids for each interval
169 				itAnnoIds = begin(getValue(itCountStore).annoIds);
170 				itAnnoIdsEnd = end(getValue(itCountStore).annoIds);
171 				for ( ; itAnnoIds != itAnnoIdsEnd; goNext(itAnnoIds))
172 				{
173                     readOutput << "UNKNOWN_REGION";
174 					if (position(itAnnoIds, getValue(itCountStore).annoIds) != endPosition(getValue(itCountStore).annoIds) - 1)
175                         readOutput << ':';
176 				}
177                 readOutput << "\tUNKNOWN_REGION";
178 			}
179 		}
180         readOutput << "\n";
181 	}
182 }
183 
184 
185 
186 //////////////////////////////////////////////////////////////////////////////
187 //create AnnoCountGFF
188 //////////////////////////////////////////////////////////////////////////////
189 
190 template<typename TFile, typename TAnnoCountStore, typename TAnnoNormStore, typename TSpec, typename TConfig, typename TMap>
191 inline void
createAnnoCountGFF(TFile & annoOutput,TAnnoCountStore & annoCountStore,TAnnoNormStore & annoNormStore,FragmentStore<TSpec,TConfig> & fragStore,TMap & mapO)192 createAnnoCountGFF(TFile & annoOutput, TAnnoCountStore & annoCountStore, TAnnoNormStore &annoNormStore, FragmentStore<TSpec, TConfig> & fragStore, TMap &mapO)
193 {
194 	typedef typename FragmentStore<TSpec, TConfig>::TContigPos 		TContigPos;
195 	typedef typename FragmentStore<TSpec, TConfig>::TAnnotationStore 	TAnnotationStore;
196 	typedef typename Value<TAnnotationStore>::Type 				TAnnotationStoreElement;
197 	typedef typename TAnnotationStoreElement::TId 				TId;
198 	typedef typename Iterator<TAnnoCountStore>::Type 			TCountIter;
199 	typedef typename Iterator<TAnnoNormStore>::Type				TNormIter;
200 	typedef typename Iterator<TAnnotationStore>::Type 			TAnnoIter;
201 
202 	static const TId INVALID_ID = TAnnotationStoreElement::INVALID_ID;
203 	static const TContigPos INVALID_POS = TAnnotationStoreElement::INVALID_POS;
204 
205 	TCountIter itCount = begin(annoCountStore);
206 	TCountIter itCountEnd = end(annoCountStore);
207 	TAnnoIter itAnno = begin(fragStore.annotationStore);
208 	TNormIter itNorm = begin(annoNormStore);
209 
210 	for ( ; itCount != itCountEnd; goNext(itCount), goNext(itAnno), goNext(itNorm))
211 	{
212         if (getValue(itAnno).typeId != INVALID_ID)
213             if (fragStore.annotationTypeStore[getValue(itAnno).typeId] == "<root>") continue;
214         // contig-name
215         if (getValue(itAnno).contigId == INVALID_ID )
216             annoOutput << "INVALID_ID\t";
217         else
218             annoOutput << getValue(fragStore.contigNameStore, getValue(itAnno).contigId) << '\t';
219         annoOutput << "Annotation_Count\tregion\t";
220         // startposition endposition orientation .
221         if (getValue(itAnno).beginPos == INVALID_POS)
222 		{
223 			annoOutput << ".\t.\t" << getValue(itCount);
224 			if (getValue(itAnno).parentId == INVALID_ID || (fragStore.annotationStore[getValue(itAnno).parentId].typeId != INVALID_ID && fragStore.annotationTypeStore[fragStore.annotationStore[getValue(itAnno).parentId].typeId] == "<root>"))
225 			{
226 				if (mapValue(mapO, position(itAnno, fragStore.annotationStore)) == 0)
227 					annoOutput << "\t+\t.\t";
228 				else
229 					annoOutput << "\t-\t.\t";
230 			}
231 			else
232             {
233                 annoOutput << "\t.\t.\t";
234             }
235 		}
236 		else
237 		{
238 			if (getValue(itAnno).beginPos <= getValue(itAnno).endPos)
239 				annoOutput << getValue(itAnno).beginPos + 1
240 				           << '\t'
241 				           << getValue(itAnno).endPos
242 				           << '\t'
243 				           << getValue(itCount)
244 				           << "\t+\t.\t";
245 			else
246 				annoOutput << getValue(itAnno).endPos + 1
247 				           << '\t'
248 				           << getValue(itAnno).beginPos
249 				           << '\t'
250 				           << getValue(itCount)
251 				           << "\t-\t.\t";
252 		}
253 		// annotation-name (parent annotation-name)
254 		if (getValue(itAnno).parentId == INVALID_ID || (fragStore.annotationStore[getValue(itAnno).parentId].typeId != INVALID_ID && fragStore.annotationTypeStore[fragStore.annotationStore[getValue(itAnno).parentId].typeId] == "<root>"))
255 			annoOutput << "ID="
256 			           << getValue(fragStore.annotationNameStore, position(itAnno, fragStore.annotationStore))
257 			           << ';';
258 		else
259 			annoOutput << "ID="
260 			           << getValue(fragStore.annotationNameStore, position(itAnno, fragStore.annotationStore))
261 			           << ";ParentID="
262 			           << getValue(fragStore.annotationNameStore, getValue(itAnno).parentId)
263 			           << ';';
264         annoOutput << formattedNumber("%f", *itNorm) << ";\n";
265 	}
266 }
267 
268 
269 //////////////////////////////////////////////////////////////////////////////
270 //create tupleCountGFF
271 //////////////////////////////////////////////////////////////////////////////
272 
273 template<typename TFile, typename TTupleCountStore, typename TSpec, typename TConfig>
274 inline void
createTupleCountGFF(TFile & tupleOutput,TTupleCountStore & tupleCountStore,FragmentStore<TSpec,TConfig> & fragStore,unsigned thresholdCount,double thresholdRPKM)275 createTupleCountGFF(TFile & tupleOutput, TTupleCountStore & tupleCountStore, FragmentStore<TSpec, TConfig> & fragStore, unsigned thresholdCount, double thresholdRPKM)
276 {
277 	typedef typename FragmentStore<TSpec, TConfig>::TAnnotationStore 	TAnnotationStore;
278 	typedef typename Value<TAnnotationStore>::Type 				TAnnotationStoreElement;
279 	typedef typename TAnnotationStoreElement::TId 				TId;
280 
281 	typedef typename Iterator<TTupleCountStore>::Type 			TCountStoreIter;
282 	typedef typename Value<TTupleCountStore>::Type				TTupleCountStoreElement;
283 	typedef typename TTupleCountStoreElement::TTupleList			TTupleList;
284 	typedef typename TTupleCountStoreElement::TTupleCounts			TTupleCounts;
285 	typedef typename TTupleCountStoreElement::TTupleNorm			TTupleNorm;
286 	typedef typename Value<TTupleList>::Type				TTupel;
287 	typedef typename Iterator<TTupleList>::Type				TTupleListIter;
288 	typedef typename Iterator<TTupleCounts>::Type				TCountIter;
289 	typedef typename Iterator<TTupleNorm>::Type				TNormIter;
290 	typedef typename Iterator<TTupel>::Type					TTupelIter;
291 
292 	static const TId INVALID_ID = TAnnotationStoreElement::INVALID_ID;
293 
294 	if (!empty(tupleCountStore))
295 	{
296 		TCountStoreIter itCountStore = begin(tupleCountStore);
297 		TCountStoreIter itCountStoreEnd = end(tupleCountStore);
298 		TAnnotationStoreElement currentElement;
299 		TTupleListIter itT;
300 		TTupleListIter itTEnd;
301 		TCountIter itC;
302 		TNormIter itN;
303 		TTupelIter itId;
304 		TTupelIter itIdEnd;
305 		for ( ; itCountStore != itCountStoreEnd; goNext(itCountStore))
306 		{
307 			currentElement = getValue(fragStore.annotationStore, position(itCountStore, tupleCountStore));
308 
309 			itT = begin(getValue(itCountStore).readConnections);
310 			itTEnd = end(getValue(itCountStore).readConnections);
311 			itC = begin(getValue(itCountStore).readConnectionCounts);
312 			itN = begin(getValue(itCountStore).readConnectionNorm);
313 			// read connections:
314 			for ( ; itT != itTEnd; goNext(itT), goNext(itC), goNext(itN))
315 			{
316 				if (getValue(itC) >= thresholdCount && getValue(itN) >= thresholdRPKM)
317 				{
318 					// contig-name
319 					tupleOutput << getValue(fragStore.contigNameStore, currentElement.contigId) << '\t';
320 					// parent-name
321 					if (currentElement.parentId == INVALID_ID )
322 						tupleOutput << "NO_PARENT\t";
323 					else
324 						tupleOutput << getValue(fragStore.annotationNameStore, currentElement.parentId) << '\t';
325 					// orientation
326 					if ( currentElement.beginPos <= currentElement.endPos )
327 						tupleOutput << "+\t";
328 					else
329 						tupleOutput << "-\t";
330 					// first annotationId of tuple (store implicit)
331 					tupleOutput << getValue(fragStore.annotationNameStore, position(itCountStore, tupleCountStore));
332 					// other annotationIds
333 					itId = begin(getValue(itT));
334 					itIdEnd = end(getValue(itT));
335 					for ( ; itId != itIdEnd; goNext(itId))
336 						tupleOutput << ":" << getValue(fragStore.annotationNameStore, getValue(itId));
337 					tupleOutput << '\t';
338 					// tuple count
339 					tupleOutput << getValue(itC) << '\t';
340 					// normalized tuple count
341                     tupleOutput << formattedNumber("%f", *itN) << '\n';
342 				}
343 			}
344 			//matepair connections:
345 			itT = begin(getValue(itCountStore).matePairConnections);
346 			itTEnd = end(getValue(itCountStore).matePairConnections);
347 			itC = begin(getValue(itCountStore).matePairConnectionCounts);
348 			itN = begin(getValue(itCountStore).matePairConnectionNorm);
349 			for ( ; itT != itTEnd; goNext(itT), goNext(itC), goNext(itN))
350 			{
351 				if (getValue(itC) >= thresholdCount && getValue(itN) >= thresholdRPKM)
352 				{
353 					// contig-name
354 					tupleOutput << getValue(fragStore.contigNameStore, currentElement.contigId) << '\t';
355 					// parent-name
356 					if (currentElement.parentId == INVALID_ID )
357 						tupleOutput << "NO_PARENT\t";
358 					else
359 						tupleOutput << getValue(fragStore.annotationNameStore, currentElement.parentId) << '\t';
360 					// orientation
361 					if ( currentElement.beginPos <= currentElement.endPos )
362 						tupleOutput << "+\t";
363 					else
364 						tupleOutput << "-\t";
365 					// first annotationId of tuple
366 					tupleOutput << getValue(fragStore.annotationNameStore, position(itCountStore, tupleCountStore));
367 					// other annotationIds of first read
368 					itId = begin(getValue(itT));
369 					itIdEnd = end(getValue(itT));
370 					for ( ; itId != itIdEnd && getValue(itId) != INVALID_ID; goNext(itId))
371 						tupleOutput << ":" << getValue(fragStore.annotationNameStore, getValue(itId));
372 					goNext(itId);
373                     tupleOutput << '^';
374 					// annotationIds of second read
375 					tupleOutput << getValue(fragStore.annotationNameStore, getValue(itId));
376 					goNext(itId);
377 					for ( ; itId != itIdEnd; goNext(itId))
378 						tupleOutput << ':' << getValue(fragStore.annotationNameStore, getValue(itId));
379                     tupleOutput << '\t';
380 					// tuple count
381 					tupleOutput << *itC << '\t';
382 					// normalized tuple count
383                     tupleOutput << formattedNumber("%f", *itN) << '\n';
384 				}
385 			}
386 		}
387 	}
388 }
389 
390 
391 //////////////////////////////////////////////////////////////////////////////
392 
393 
394 }// namespace seqan
395 
396 #endif //#ifndef SEQAN_HEADER_...
397