1 ////////////////////////////////////////////////////////////////////////////////
2 //! \file
3 //! \author Adam M Phillippy
4 //! \date 03/26/2003
5 //!
6 //! \brief Class declarations for the handling of delta alignment files
7 //!
8 //! \see delta.cc
9 ////////////////////////////////////////////////////////////////////////////////
10
11 #ifndef __DELTA_HH
12 #define __DELTA_HH
13
14 #include "tigrinc.hh"
15 #include <cassert>
16 #include <string>
17 #include <vector>
18 #include <fstream>
19 #include <cstdlib>
20 #include <iostream>
21 #include <map>
22
23
24 const std::string NUCMER_STRING = "NUCMER"; //!< nucmer id string
25 const std::string PROMER_STRING = "PROMER"; //!< promer id string
26
27 typedef char AlignmentType_t; //!< type of alignment data
28 const AlignmentType_t NULL_DATA = 0; //!< unknown alignment data type
29 const AlignmentType_t NUCMER_DATA = 'N'; //!< nucmer alignment data
30 const AlignmentType_t PROMER_DATA = 'P'; //!< promer alignment data
31
32 typedef unsigned char Dir_t; //!< directional type
33 const Dir_t FORWARD_DIR = 0; //!< forward direction
34 const Dir_t REVERSE_DIR = 1; //!< reverse direction
35
36
37
38 //===================================================== DeltaAlignment_t =======
39 struct DeltaAlignment_t
40 //!< A single delta encoded alignment region
41 {
42 long sR; //!< start coordinate in the reference
43 long eR; //!< end coordinate in the reference
44 long sQ; //!< start coordinate in the reference
45 long eQ; //!< end coordinate in the reference
46 long idyc; //!< number of mismatches in the alignment
47 long simc; //!< number of similarity scores < 1 in the alignment
48 long stpc; //!< number of stop codons in the alignment
49
50 float idy; //!< percent identity [0 - 100]
51 float sim; //!< percent similarity [0 - 100]
52 float stp; //!< percent stop codon [0 - 100]
53
54 std::vector<long> deltas; //!< delta encoded alignment informaiton
55
DeltaAlignment_tDeltaAlignment_t56 DeltaAlignment_t ( )
57 {
58 clear ( );
59 }
60
clearDeltaAlignment_t61 void clear ( )
62 {
63 sR = eR = sQ = eQ = 0;
64 idy = sim = stp = 0;
65 deltas.clear ( );
66 }
67
68 // Read one alignment
read_nucmerDeltaAlignment_t69 inline bool read_nucmer(std::istream& is, const bool read_deltas = true) { return read(is, false, read_deltas); }
read_promerDeltaAlignment_t70 inline bool read_promer(std::istream& is, const bool read_deltas = true) { return read(is, true, read_deltas); }
71 bool read(std::istream& is, const bool promer, const bool read_deltas = true);
72 };
operator >>(std::istream & is,DeltaAlignment_t & a)73 inline std::istream& operator>>(std::istream& is, DeltaAlignment_t& a) {
74 a.read_nucmer(is);
75 return is;
76 }
operator <<(std::ostream & os,const DeltaAlignment_t & a)77 inline std::ostream& operator<<(std::ostream& os, const DeltaAlignment_t& a) {
78 os << a.sR << ' ' << a.eR << ' ' << a.sQ << ' ' << a.eQ << ' ' << a.idyc << ' ' << a.simc << ' ' << a.stpc << '\n';
79 for(auto d : a.deltas)
80 os << d << '\n';
81 return os;
82 }
83
84
85 //===================================================== DeltaRecord_t ==========
86 struct DeltaRecord_t
87 //!< A delta record representing the alignments between two sequences
88 {
89 std::string idR; //!< reference contig ID
90 std::string idQ; //!< query contig ID
91 long lenR; //!< length of the reference contig
92 long lenQ; //!< length of the query contig
93
94 std::vector<DeltaAlignment_t> aligns; //!< alignments between the two seqs
95
DeltaRecord_tDeltaRecord_t96 DeltaRecord_t ( )
97 {
98 clear ( );
99 }
100
clearDeltaRecord_t101 void clear ( )
102 {
103 idR.erase ( );
104 idQ.erase ( );
105 lenR = lenQ = 0;
106 aligns.clear ( );
107 }
108
109 bool read(std::istream& is);
110 };
operator >>(std::istream & is,DeltaRecord_t & r)111 inline std::istream& operator>>(std::istream& is, DeltaRecord_t& r) {
112 r.read(is);
113 return is;
114 }
115
operator <<(std::ostream & os,const DeltaRecord_t & r)116 inline std::ostream& operator<<(std::ostream& os, const DeltaRecord_t& r) {
117 return os << '>' << r.idR << ' ' << r.idQ << ' ' << r.lenR << ' ' << r.lenQ;
118 }
119
120 //===================================================== DeltaReader_t ==========
121 //! \brief Delta encoded file reader class
122 //!
123 //! Handles the input of delta encoded alignment information for various MUMmer
124 //! utilities. Very basic functionality, can be expanded as necessary...
125 //!
126 //! \see DeltaRecord_t
127 //==============================================================================
128 class DeltaReader_t {
129
130 private:
131
132 std::string delta_path_m; //!< the name of the delta input file
133 std::ifstream delta_stream_m; //!< the delta file input stream
134 std::string data_type_m; //!< the type of alignment data
135 std::string reference_path_m; //!< the name of the reference file
136 std::string query_path_m; //!< the name of the query file
137 DeltaRecord_t record_m; //!< the current delta information record
138 bool is_record_m; //!< there is a valid record in record_m
139 bool is_open_m; //!< delta stream is open
140
141
142 //--------------------------------------------------- readNextAlignment ------
143 //! \brief Reads in a delta encoded alignment
144 //!
145 //! \param align read info into this structure
146 //! \param read_deltas read delta information yes/no
147 //! \pre delta_stream is positioned at the beginning of an alignment header
148 //! \return void
149 //!
150 void readNextAlignment (DeltaAlignment_t & align, const bool read_deltas);
151
152
153 //--------------------------------------------------- readNextRecord ---------
154 //! \brief Reads in the next delta record from the delta file
155 //!
156 //! \param read_deltas read delta information yes/no
157 //! \pre delta file must be open
158 //! \return true on success, false on EOF
159 //!
160 bool readNextRecord (const bool read_deltas);
161
162
163 //--------------------------------------------------- checkStream ------------
164 //! \brief Check stream status and abort program if an error has occured
165 //!
166 //! \return void
167 //!
checkStream()168 void checkStream ( )
169 {
170 if ( !delta_stream_m.good ( ) )
171 {
172 std::cerr << "ERROR: Could not parse delta file, "
173 << delta_path_m << std::endl;
174
175
176 std::cerr << "error no: "
177 << int(delta_stream_m.rdstate() & std::ifstream::failbit)
178 << int(delta_stream_m.rdstate() & std::ifstream::badbit)
179 << int(delta_stream_m.rdstate() & std::ifstream::eofbit)
180 << std::endl;
181 exit (-1);
182 }
183 }
184
185
186 public:
187 //--------------------------------------------------- DeltaReader_t ----------
188 //! \brief Default constructor
189 //!
190 //! \return void
191 //!
DeltaReader_t()192 DeltaReader_t ( )
193 {
194 is_record_m = false;
195 is_open_m = false;
196 }
197
198
199 //--------------------------------------------------- ~DeltaReader_t ---------
200 //! \brief Default destructor
201 //!
202 //! \return void
203 //!
~DeltaReader_t()204 ~DeltaReader_t ( )
205 {
206 close ( );
207 }
208
209
210 //--------------------------------------------------- open -------------------
211 //! \brief Opens a delta file by path
212 //!
213 //! \param delta file to open
214 //! \return void
215 //!
216 void open (const std::string & delta_path);
217
218
219 //--------------------------------------------------- close ------------------
220 //! \brief Closes any open delta file and resets class members
221 //!
222 //! \return void
223 //!
close()224 void close ( )
225 {
226 delta_path_m.erase ( );
227 delta_stream_m.close ( );
228 data_type_m.erase ( );
229 reference_path_m.erase ( );
230 query_path_m.erase ( );
231 record_m.clear ( );
232 is_record_m = false;
233 is_open_m = false;
234 }
235
236
237 //--------------------------------------------------- readNext --------------
238 //! \brief Reads in the next delta record from the delta file
239 //!
240 //! \param read_deltas read delta information yes/no
241 //! \pre delta file must be open
242 //! \return true on success, false on EOF
243 //!
readNext(bool getdeltas=true)244 inline bool readNext (bool getdeltas = true)
245 {
246 return readNextRecord (getdeltas);
247 }
248
249
250
251 //--------------------------------------------------- readNextHeadersOnly ----
252 //! \brief Reads in the next delta record from the delta file
253 //!
254 //! Only reads the alignment header information, does not read in the delta
255 //! information and leaves each alignment structure's delta vector empty.
256 //!
257 //! \pre delta file must be open
258 //! \return true on success, false on EOF
259 //!
readNextHeadersOnly()260 inline bool readNextHeadersOnly ( )
261 {
262 return readNextRecord (false);
263 }
264
265
266 //--------------------------------------------------- getRecord --------------
267 //! \brief Returns a reference to the current delta record
268 //!
269 //! \pre readNext( ) was successfully called in advance
270 //! \return true on success, false on failure or end of file
271 //!
getRecord() const272 const DeltaRecord_t & getRecord ( ) const
273 {
274 assert (is_record_m);
275 return record_m;
276 }
277
278
279 //--------------------------------------------------- getDeltaPath -----------
280 //! \brief Get the path of the current delta file
281 //!
282 //! \pre delta file is open
283 //! \return the path of the delta file
284 //!
getDeltaPath() const285 const std::string & getDeltaPath ( ) const
286 {
287 assert (is_open_m);
288 return delta_path_m;
289 }
290
291
292 //--------------------------------------------------- getDataType ------------
293 //! \brief Get the data type of the current delta file
294 //!
295 //! \pre delta file is open
296 //! \return the data type of the current file
297 //!
getDataType() const298 const std::string & getDataType ( ) const
299 {
300 assert (is_open_m);
301 return data_type_m;
302 }
303
304
305 //--------------------------------------------------- getReferencePath -------
306 //! \brief Get the path of the MUMmer reference file
307 //!
308 //! \pre delta file is open
309 //! \return the path of the MUMmer reference file
310 //!
getReferencePath() const311 const std::string & getReferencePath ( ) const
312 {
313 assert (is_open_m);
314 return reference_path_m;
315 }
316
317
318 //--------------------------------------------------- getQueryPath -----------
319 //! \brief Get the path of the MUMmer query file
320 //!
321 //! \pre delta file is open
322 //! \return the path of the MUMmer query file
323 //!
getQueryPath() const324 const std::string & getQueryPath ( ) const
325 {
326 assert (is_open_m);
327 return query_path_m;
328 }
329 };
330
331
332
333 //===================================================== SNP_t ==================
334 struct DeltaEdgelet_t;
335 struct DeltaEdge_t;
336 struct DeltaNode_t;
337 struct SNP_t
338 //!< A single nuc/aa poly
339 {
340 long buff;
341 char cQ, cR;
342 long pQ, pR;
343 int conQ, conR;
344 std::string ctxQ, ctxR;
345 DeltaEdgelet_t * lp;
346 DeltaEdge_t * ep;
347
SNP_tSNP_t348 SNP_t ( )
349 {
350 cQ = cR = 0;
351 buff = pQ = pR = 0;
352 conQ = conR = 0;
353 };
354 };
355
356
357
358 //===================================================== DeltaEdgelet_t =========
359 struct DeltaEdgelet_t
360 //!< A piece of a delta graph edge, a single alignment
361 {
362 unsigned char isGOOD : 1; //!< meets the requirements
363 unsigned char isQLIS : 1; //!< is part of the query's LIS
364 unsigned char isRLIS : 1; //!< is part of the reference's LIS
365 unsigned char isGLIS : 1; //!< is part of the reference/query LIS
366 unsigned char dirR : 1; //!< reference match direction
367 unsigned char dirQ : 1; //!< query match direction
368
369 DeltaEdge_t * edge;
370 float idy, sim, stp; //!< percent identity [0 - 1]
371 long idyc, simc, stpc; //!< idy, sim, stp counts
372 long loQ, hiQ, loR, hiR; //!< alignment bounds
373 int frmQ, frmR; //!< reading frame
374
375 std::string delta; //!< delta information
376 std::vector<SNP_t *> snps; //!< snps for this edgelet
377
DeltaEdgelet_tDeltaEdgelet_t378 DeltaEdgelet_t ( )
379 {
380 edge = NULL;
381 isGOOD = true;
382 isQLIS = isRLIS = isGLIS = false;
383 dirR = dirQ = FORWARD_DIR;
384 idy = sim = stp = 0;
385 idyc = simc = stpc = 0;
386 loQ = hiQ = loR = hiR = 0;
387 frmQ = frmR = 1;
388 }
389
~DeltaEdgelet_tDeltaEdgelet_t390 ~DeltaEdgelet_t ( )
391 {
392 std::vector<SNP_t *>::iterator i;
393 for ( i = snps . begin( ); i != snps . end( ); ++ i )
394 delete (*i);
395 }
396
isNegativeDeltaEdgelet_t397 bool isNegative() const
398 { return ( dirR != dirQ ); }
399
isPositiveDeltaEdgelet_t400 bool isPositive() const
401 { return ( dirR == dirQ ); }
402
slopeDeltaEdgelet_t403 int slope() const
404 { return ( dirR == dirQ ? +1 : -1 ); }
405
loR2QDeltaEdgelet_t406 long loR2Q() const
407 { return ( isPositive() ? loQ : hiQ ); }
408
hiR2QDeltaEdgelet_t409 long hiR2Q() const
410 { return ( isPositive() ? hiQ : loQ ); }
411
loQ2RDeltaEdgelet_t412 long loQ2R() const
413 { return ( isPositive() ? loR : hiR ); }
414
hiQ2RDeltaEdgelet_t415 long hiQ2R() const
416 { return ( isPositive() ? hiR : loR ); }
417 };
418
419
420
421 //===================================================== DeltaEdge_t ============
422 struct DeltaEdge_t
423 //!< A delta graph edge, alignments between a single reference and query
424 {
425 DeltaNode_t * refnode; //!< the adjacent reference node
426 DeltaNode_t * qrynode; //!< the adjacent query node
427 std::vector<DeltaEdgelet_t *> edgelets; //!< the set of individual alignments
428
DeltaEdge_tDeltaEdge_t429 DeltaEdge_t ( )
430 { refnode = qrynode = NULL; }
431
~DeltaEdge_tDeltaEdge_t432 ~DeltaEdge_t ( )
433 {
434 std::vector<DeltaEdgelet_t *>::iterator i;
435 for ( i = edgelets . begin( ); i != edgelets . end( ); ++ i )
436 delete (*i);
437 }
438
439 void build (const DeltaRecord_t & rec);
440 };
441
442
443 //===================================================== DeltaNode_t ============
444 struct DeltaNode_t
445 //!< A delta graph node, contains the sequence information
446 {
447 const std::string * id; //!< the id of the sequence
448 char * seq; //!< the DNA sequence
449 long len; //!< the length of the sequence
450 std::vector<DeltaEdge_t *> edges; //!< the set of related edges
451
DeltaNode_tDeltaNode_t452 DeltaNode_t ( )
453 { id = NULL; seq = NULL; len = 0; }
454
~DeltaNode_tDeltaNode_t455 ~DeltaNode_t ( )
456 { free (seq); } // DeltaGraph_t will take care of destructing the edges
457 };
458
459
460
461 //===================================================== DeltaGraph_t ===========
462 //! \brief A graph of sequences (nodes) and their alignments (edges)
463 //!
464 //! A bipartite graph with two partite sets, R and Q, where R is the set of
465 //! reference sequences and Q is the set of query sequences. These nodes are
466 //! named "DeltaNode_t". We connect a node in R to a node in Q if an alignment
467 //! is present between the two sequences. The group of all alignments between
468 //! the two is named "DeltaEdge_t" and a single alignment between the two is
469 //! named a "DeltaEdgelet_t". Alignment coordinates reference the forward
470 //! strand and are stored lo before hi.
471 //!
472 //==============================================================================
473 class DeltaGraph_t
474 {
475 public:
476
477 std::map<std::string, DeltaNode_t> refnodes;
478 //!< the reference graph nodes, 1 per aligned sequence
479
480 std::map<std::string, DeltaNode_t> qrynodes;
481 //!< the query graph nodes, 1 per aligned sequence
482
483 std::string refpath; //!< path of the reference FastA file
484 std::string qrypath; //!< path of the query FastA file
485 AlignmentType_t datatype; //!< alignment data type
486
DeltaGraph_t()487 DeltaGraph_t()
488 { datatype = NULL_DATA; }
489
~DeltaGraph_t()490 ~DeltaGraph_t ( )
491 { clear(); }
492
493 void build(const std::string & deltapath, bool getdeltas = true);
494 void clean();
495 void clear();
496 long getNodeCount();
497 long getEdgeCount();
498 long getEdgeletCount();
499
500 void flagGOOD();
501 void flag1to1(float epsilon = -1, float maxolap = 100.0);
502 void flagMtoM(float epsilon = -1, float maxolap = 100.0);
503 void flagGLIS(float epsilon = -1);
504 void flagQLIS(float epsilon = -1,
505 float maxolap = 100.0,
506 bool flagBad = true);
507 void flagRLIS(float epsilon = -1,
508 float maxolap = 100.0,
509 bool flagbad = true);
510 void flagScore(long minlen, float minidy);
511 void flagUNIQ(float minuniq);
512
513 void loadSequences();
514 std::ostream & outputDelta(std::ostream & out);
515 };
516
517 #endif // #ifndef __DELTA_HH
518