1 #ifndef SCAFFOLDS_H
2 #define SCAFFOLDS_H
3 /*
4  *  scaffolds.h
5  *  cufflinks
6  *
7  *  Created by Cole Trapnell on 3/30/09.
8  *  Copyright 2009 Cole Trapnell. All rights reserved.
9  *
10  */
11 
12 #ifdef HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15 
16 #include <set>
17 #include <vector>
18 
19 #include "common.h"
20 #include "hits.h"
21 
22 #include <boost/thread.hpp>
23 
24 using namespace std;
25 
26 enum CuffOpCode { CUFF_MATCH, CUFF_INTRON, CUFF_UNKNOWN };
27 
28 struct AugmentedCuffOp
29 {
AugmentedCuffOpAugmentedCuffOp30 	AugmentedCuffOp(const CuffOpCode& O, int g_off, int g_len)
31 	: opcode(O),
32 	genomic_offset(g_off),
33 	genomic_length(g_len)
34 	{
35 		assert (genomic_length >= 0);
36 	}
37 
g_leftAugmentedCuffOp38 	void g_left(int left)
39 	{
40 		int right = genomic_offset + genomic_length;
41 		genomic_offset = left;
42 		genomic_length = right - left;
43 	}
g_leftAugmentedCuffOp44 	int g_left() const { return genomic_offset; }
45 
g_rightAugmentedCuffOp46 	void g_right(int right)
47 	{
48 		genomic_length = right - genomic_offset;
49 	}
g_rightAugmentedCuffOp50 	int g_right() const { return genomic_offset + genomic_length; }
51 
overlap_in_genomeAugmentedCuffOp52 	static bool overlap_in_genome(const AugmentedCuffOp& lhs,
53 								  const AugmentedCuffOp& rhs)
54 	{
55 		if (lhs.g_left() >= rhs.g_left() && lhs.g_left() < rhs.g_right())
56 			return true;
57 		if (lhs.g_right() > rhs.g_left() && lhs.g_right() < rhs.g_right())
58 			return true;
59 		if (rhs.g_left() >= lhs.g_left() && rhs.g_left() < lhs.g_right())
60 			return true;
61 		if (rhs.g_right() > lhs.g_left() && rhs.g_right() < lhs.g_right())
62 			return true;
63 		return false;
64 	}
65 
containsAugmentedCuffOp66 	bool contains(const AugmentedCuffOp& other) const
67 	{
68 		if (g_left() <= other.g_left() && g_right() >= other.g_right())
69 			return true;
70 		return false;
71 	}
72 
properly_containsAugmentedCuffOp73 	bool properly_contains(const AugmentedCuffOp& other) const
74 	{
75 		if ((g_left() < other.g_left() && g_right() >= other.g_right()) ||
76 			(g_left() <= other.g_left() && g_right() > other.g_right()))
77 			return true;
78 		return false;
79 	}
80 
match_lengthAugmentedCuffOp81 	static int match_length(const AugmentedCuffOp& op, int left, int right)
82 	{
83 		int len = 0;
84 		int left_off = op.g_left();
85 		if (op.opcode == CUFF_MATCH)
86 		{
87 			if (left_off + op.genomic_length > left && left_off < right)
88 			{
89 				if (left_off > left)
90 				{
91 					if (left_off + op.genomic_length <= right + 1)
92 						len += op.genomic_length;
93 					else
94 						len += right - left_off;
95 				}
96 				else
97 				{
98 					if (left_off + op.genomic_length <= right + 1)
99 						len += (left_off + op.genomic_length - left);
100 					else
101 						return right - left;
102 				}
103 			}
104 		}
105 		return len;
106 	}
107 
108 	static bool compatible(const AugmentedCuffOp& lhs,
109 						   const AugmentedCuffOp& rhs,
110 						   int overhang_tolerance = bowtie_overhang_tolerance);
111 
112 	bool operator==(const AugmentedCuffOp& rhs) const
113 	{
114 		return (opcode == rhs.opcode &&
115 				genomic_offset == rhs.genomic_offset &&
116 				genomic_length == rhs.genomic_length);
117 	}
118 
119 	bool operator<(const AugmentedCuffOp& rhs) const
120 	{
121 		if (opcode != rhs.opcode)
122 		{
123 			return opcode < rhs.opcode;
124 		}
125 		if (genomic_offset != rhs.genomic_offset)
126 		{
127 			return genomic_offset < rhs.genomic_offset;
128 		}
129 		if (genomic_length != rhs.genomic_length)
130 		{
131 			return genomic_length < rhs.genomic_length;
132 		}
133 		return false;
134 	}
135 
136     static bool g_left_lt(const AugmentedCuffOp& lhs,
137 						  const AugmentedCuffOp& rhs);
138 
139 	bool operator!=(const AugmentedCuffOp& rhs) const
140 	{
141 		return !(*this == rhs);
142 	}
143 
144     static void merge_ops(const std::vector<AugmentedCuffOp>& ops,
145 						  vector<AugmentedCuffOp>& merged,
146 						  bool introns_overwrite_matches,
147                           bool allow_flank_introns = false);
148 
149     static void fill_interstices(vector<AugmentedCuffOp>& to_fill,
150 								 const vector<AugmentedCuffOp>& filler,
151 								 bool allow_flank_fill,
152                                  bool allow_flank_introns);
153 
154 	CuffOpCode opcode;
155 	int genomic_offset;
156 	int genomic_length;
157 };
158 
159 class Scaffold
160 {
cuff_ops_from_cigar(vector<AugmentedCuffOp> & ops,const vector<CigarOp> & cig,int & g_left)161 	void cuff_ops_from_cigar(vector<AugmentedCuffOp>& ops,
162 							 const vector<CigarOp>& cig,
163 							 int& g_left)
164 	{
165 		for (size_t i = 0; i < cig.size(); ++i)
166 		{
167 			assert(cig[i].length >= 0);
168 			switch(cig[i].opcode)
169 			{
170 				case MATCH:
171 					ops.push_back(AugmentedCuffOp(CUFF_MATCH, g_left, cig[i].length));
172 					g_left += cig[i].length;
173 					break;
174 				case REF_SKIP:
175 					ops.push_back(AugmentedCuffOp(CUFF_INTRON, g_left, cig[i].length));
176 					g_left += cig[i].length;
177 					break;
178 				case SOFT_CLIP:
179 					g_left += cig[i].length;
180 					break;
181                 case HARD_CLIP:
182 					//g_left += cig[i].length;
183 					break;
184                 case INS:
185                     //ops.back().genomic_length -= cig[i].length;
186                     //g_left -= cig[i].length;
187 					break;
188                 case DEL:
189 					if (!ops.empty())
190 						ops.back().genomic_length += cig[i].length;
191                     g_left += cig[i].length;
192 					break;
193 				default:
194 					assert(false);
195 					break;
196 			}
197             assert (ops.empty() || ops.back().genomic_length >= 1);
198 		}
199 	}
200 
201 	RefID _ref_id;
202 
203 public:
204 
Scaffold()205 	Scaffold() :
206 		_ref_id(0),
207 		_is_ref(false),
208 		_strand(CUFF_STRAND_UNKNOWN),
209 		_classcode(0),
210         _fpkm(0.0) {}
211 
Scaffold(const MateHit & mate)212 	Scaffold(const MateHit& mate) :
213 		_ref_id(mate.ref_id()),
214 		_is_ref(false),
215 	    _classcode(0)
216 	{
217 		const ReadHit* left_hit = mate.left_alignment();
218 		//CuffAlign a;
219 		_strand = mate.strand();
220 
221 		vector<AugmentedCuffOp> aug_ops;
222 
223 		if (left_hit)
224 		{
225 			const vector<CigarOp>& l_cig = left_hit->cigar();
226 			int g_left = left_hit->left();
227 			cuff_ops_from_cigar(aug_ops, l_cig, g_left);
228 
229 			const ReadHit* right_hit = mate.right_alignment();
230 			if (right_hit)
231 			{
232 				const vector<CigarOp>& r_cig = right_hit->cigar();
233 				int gap = (right_hit->left() - g_left);
234 
235 				if (gap < 0)
236 				{
237 					g_left += gap;
238 					cuff_ops_from_cigar(aug_ops, r_cig, g_left);
239 				}
240 				else
241 				{
242 					if (gap > 0)
243 					{
244 						//if (gap < (int)min_intron_length)
245 						//	aug_ops.push_back(AugmentedCuffOp(CUFF_MATCH, g_left, gap));
246 						//else
247                         aug_ops.push_back(AugmentedCuffOp(CUFF_UNKNOWN, g_left, gap));
248 						g_left += gap;
249 
250 					}
251 					cuff_ops_from_cigar(aug_ops, r_cig, g_left);
252 				}
253 
254 			}
255 		}
256 		else
257 		{
258 			assert(false);
259 		}
260 		_mates_in_scaff.push_back(&mate);
261 		sort(aug_ops.begin(), aug_ops.end(), AugmentedCuffOp::g_left_lt);
262 
263         for(size_t i = 0; i < aug_ops.size(); ++i)
264         {
265             assert (aug_ops[i].genomic_length >= 1);
266         }
267 
268         AugmentedCuffOp::merge_ops(aug_ops, _augmented_ops, false);
269 
270 		int r_check = left();
271 		for (size_t i = 0; i < _augmented_ops.size(); ++i)
272 			r_check += _augmented_ops[i].genomic_length;
273 
274 #ifdef DEBUG
275 		if (r_check != right())
276 		{
277             AugmentedCuffOp::merge_ops(aug_ops, _augmented_ops, false);
278 		}
279 #endif
280 		assert (r_check == right());
281 
282 		_has_intron = has_intron(*this);
283 
284 		assert(!has_strand_support() || _strand != CUFF_STRAND_UNKNOWN);
285 
286         for(size_t i = 1; i < _augmented_ops.size(); ++i)
287 		{
288 			assert(_augmented_ops[i-1].g_right() == _augmented_ops[i].g_left());
289 		}
290 
291 		assert (_augmented_ops.front().opcode == CUFF_MATCH);
292         assert (_augmented_ops.back().opcode == CUFF_MATCH);
293 
294         if (library_type == "transfrags")
295         {
296             double avg_fpkm = mate.mass();
297             fpkm(avg_fpkm);
298         }
299 	}
300 
301 	Scaffold(const vector<Scaffold>& hits, bool introns_overwrite_matches = true)
_is_ref(false)302 		:  _is_ref(false), _classcode(0)
303 	{
304 		assert (!hits.empty());
305 		_ref_id = hits[0].ref_id();
306 
307 		Scaffold::merge(hits, *this, introns_overwrite_matches);
308 
309 		assert(!has_strand_support() || _strand != CUFF_STRAND_UNKNOWN);
310 
311         assert (_augmented_ops.front().opcode == CUFF_MATCH);
312         assert (_augmented_ops.back().opcode == CUFF_MATCH);
313 
314         if (library_type == "transfrags")
315         {
316             double avg_fpkm = 0.0;
317             BOOST_FOREACH (const Scaffold& sc, hits)
318             {
319                 avg_fpkm += sc.fpkm();
320             }
321             avg_fpkm /= hits.size();
322             fpkm(avg_fpkm);
323         }
324 	}
325 
326 	// For manually constructing scaffolds, for example when a reference is
327 	// available
328 	Scaffold(RefID ref_id, CuffStrand strand, const vector<AugmentedCuffOp>& ops, bool is_ref = false, bool is_pseudo_primary = false)
_ref_id(ref_id)329 	: _ref_id(ref_id),
330 	  _augmented_ops(ops),
331 	  _strand(strand),
332 	  _classcode(0)
333 	{
334 		_has_intron = has_intron(*this);
335 		_is_ref = is_ref;
336         _is_pseudo_primary = is_pseudo_primary;
337 
338 		assert(!has_strand_support() || _strand != CUFF_STRAND_UNKNOWN);
339 
340         assert (_augmented_ops.front().opcode == CUFF_MATCH);
341         assert (_augmented_ops.back().opcode == CUFF_MATCH);
342 	}
343 
344 	//int get_id() const { return _id; }
345 
left()346 	int left() const { return _augmented_ops.front().g_left(); }
right()347 	int right() const  { return _augmented_ops.back().g_right(); }
348 
mate_hits()349 	const vector<const MateHit*>& mate_hits() const { return _mates_in_scaff; }
350 
ref_id()351 	RefID ref_id() const { return _ref_id; }
ref_id(RefID rid)352 	void ref_id(RefID rid) { _ref_id = rid; }
353 
annotated_trans_id()354 	const string& annotated_trans_id() const { return _annotated_trans_id; }
annotated_trans_id(const string & ann_name)355 	void annotated_trans_id(const string& ann_name) { _annotated_trans_id = ann_name; }
356 
annotated_gene_id()357 	const string& annotated_gene_id() const { return _annotated_gene_id; }
annotated_gene_id(const string & ann_name)358 	void annotated_gene_id(const string& ann_name) { _annotated_gene_id = ann_name; }
359 
annotated_gene_name()360 	const string& annotated_gene_name() const { return _annotated_gene_name; }
annotated_gene_name(const string & ann_name)361 	void annotated_gene_name(const string& ann_name) { _annotated_gene_name = ann_name; }
362 
annotated_protein_id()363 	const string& annotated_protein_id() const { return _annotated_protein_id; }
annotated_protein_id(const string & ann_name)364 	void annotated_protein_id(const string& ann_name) { _annotated_protein_id = ann_name; }
365 
annotated_tss_id()366 	const string& annotated_tss_id() const { return _annotated_tss_id; }
annotated_tss_id(const string & ann_name)367 	void annotated_tss_id(const string& ann_name) { _annotated_tss_id = ann_name; }
368 
nearest_ref_id()369 	const string& nearest_ref_id() const { return _nearest_ref_id; }
nearest_ref_id(const string & ann_name)370 	void nearest_ref_id(const string& ann_name) { _nearest_ref_id = ann_name; }
371 
372     // fpkm() and num_fragments() are just used to cache values in scaffolds across
373     // multiple estimation runs (e.g. for use with bias correction
fpkm()374 	double fpkm() const {return _fpkm; }
fpkm(double fpkm)375 	void fpkm(double fpkm) { _fpkm = fpkm; }
376 
num_fragments()377     double num_fragments() const {return _num_fragments; }
num_fragments(double nf)378 	void num_fragments(double nf) { _num_fragments = nf; }
379 
seq()380 	const string& seq() const { return _seq; }
seq(const string & s)381 	void seq(const string& s) {	_seq = s; }
382 
gc_content()383 	double gc_content() const
384 	{
385 		if (_seq != "")
386 		{
387 			int count = 0;
388 			for(size_t i = 0; i < _seq.length(); ++i)
389 			{
390 				if (_seq[i] == 'G' or _seq[i] == 'g' or _seq[i] == 'C' or _seq[i] == 'c')
391 					count ++;
392 			}
393 			return count/double(_seq.length());
394 		}
395 		return -1.0;
396 	}
397 
nearest_ref_classcode()398 	char nearest_ref_classcode() const { return _classcode; }
nearest_ref_classcode(char cc)399 	void nearest_ref_classcode(char cc) { _classcode = cc; }
400 
has_intron()401 	bool has_intron() const { return _has_intron; }
has_suspicious_unknown()402 	bool has_suspicious_unknown() const { return has_suspicious_unknown(*this); }
403 
404     // returns the fraction coverage of internal exons, returns 0 if no internal exons
405 	double internal_exon_coverage() const;
406 
407     // returns true if the scaffold strand is supported with reads or exon overlap with
408     // a reference scaffold of known strand (since the scaffold may have been created with faux reads)
409     bool has_strand_support(vector<boost::shared_ptr<Scaffold> >* ref_scaffs = NULL) const;
410 
411     // returns true if all introns are supported with splice reads, false ow
412     bool hits_support_introns() const;
413     bool hits_support_introns(set<AugmentedCuffOp>& hit_introns) const;
414 
415     // returns true if all internal exons are fully covered and hits support introns, false ow
416     bool has_struct_support(set<AugmentedCuffOp>& hit_introns) const;
417 
is_ref()418 	bool is_ref() const { return _is_ref; }
is_ref(bool ir)419 	void is_ref(bool ir) { _is_ref = ir; }
420 
is_pseudo_primary()421     bool is_pseudo_primary() const { return _is_pseudo_primary; }
is_pseudo_primary(bool ip)422 	void is_pseudo_primary(bool ip) { _is_pseudo_primary = ip; }
423 
strand()424 	CuffStrand strand() const
425     {
426         return _strand;
427     }
428 
strand(CuffStrand strand)429 	void strand(CuffStrand strand)
430     {
431 		assert(!has_strand_support() || _strand != CUFF_STRAND_UNKNOWN);
432         _strand = strand;
433     }
434 
435 	// Could we merge lhs and rhs together?
436 	static bool compatible(const Scaffold& lhs,
437 						   const Scaffold& rhs,
438 						   int overhang_tolerance = bowtie_overhang_tolerance);
439 
440 	static bool strand_agree(const Scaffold& lhs,
441 							 const Scaffold& rhs);
442 
443 	static bool exons_overlap(const Scaffold& lhs,
444 							  const Scaffold& rhs);
445 
446 	// Incorporate Scaffold chow into this one.
447 	static void merge(const Scaffold& lhs,
448 					  const Scaffold& rhs,
449 					  Scaffold& merged,
450 					  bool introns_overwrite_matches);
451 
452 	static void merge(const vector<Scaffold>& s,
453 					  Scaffold& merged,
454 					  bool introns_overwrite_matches);
455 
456 	// Extend 5' end using beginning of other scaffold without adding new exons.
457 	void extend_5(const Scaffold& other);
458 	// Clip final 3' exon by given amount
459 	void trim_3(int to_remove);
460 
461     // Extend final 3' exon by given amount
462     void extend_3(int to_add);
463 
464 	void tile_with_scaffs(vector<Scaffold>& tile_scaffs, int max_len, int tile_offset) const;
465 
466 	// Creates a scaffold that matches this one but only covers the section from g_left for
467 	// a distance of match_length.  It is assumed that this region is contained in the scaffold.
468 	// sub_scaff should be an empty Scaffold object.
469 	bool sub_scaffold(Scaffold& sub_scaff, int g_left, int match_length) const;
470 
471  	// Tests whether the other scaffold is contained allowing the given overhang
472 	bool contains(const Scaffold& other, int ohang_5 = 0, int ohang_3 = 0) const
473 	{
474 		if (left() <= other.left() && right()>= other.right())
475 			return true;
476 
477         if (!(ohang_5 || ohang_3))
478             return false;
479 
480 		int left_hang;
481 		int right_hang;
482 		switch(strand())
483 		{
484 			case CUFF_FWD:
485 				left_hang = ohang_5;
486 				right_hang = ohang_3;
487 				break;
488 			case CUFF_REV:
489 				left_hang = ohang_3;
490 				right_hang = ohang_5;
491 				break;
492 			default:
493 				left_hang = max(ohang_3, ohang_5);
494 				right_hang = left_hang;
495 		}
496 
497 		// Test to see if it is contained within the relaxed boundaries
498 		if ((left()-left_hang) <= other.left() && (right() + right_hang) >= other.right())
499 		{
500 			// Ensure that there are no exons outside of the strict boundaries
501 			return (other.augmented_ops().front().g_right() > left() && other.augmented_ops().back().g_left() < right());
502 		}
503 
504 		return false;
505 
506 	}
507 
508 	// Tests whether the other scaffold contains the 5' end and is contained (allowing some overhang) on the 3' end
509 	// There can be no additional exons on the 5' end
510 	bool overlapped_3(const Scaffold& other, int ohang_5 = 0, int ohang_3 = 0) const
511 	{
512 		switch(strand())
513 		{
514 			case CUFF_FWD:
515 				return((left() + ohang_5 >= other.left() && right() + ohang_3 >= other.right()) && other.augmented_ops().front().g_right() > left());
516 			case CUFF_REV:
517 				return ((right() - ohang_5 <= other.right() && left() - ohang_3 <= other.left()) && other.augmented_ops().back().g_left() < right());
518 			default:
519 				return false;
520 		}
521 	}
522 
523 	int match_length(int left, int right) const;
524 
length()525 	int length() const
526 	{
527 
528 		if(_seq != "")
529 			return _seq.length();
530 
531 		int len = 0;
532 
533 		// FIXME: this estimate really should include estimates of the CUFF_UNKNOWN lengths
534 		// for better abundance estimation.
535 
536 		for (size_t j = 0; j < _augmented_ops.size(); ++j)
537 		{
538 			if (_augmented_ops[j].opcode == CUFF_MATCH)
539 				len += _augmented_ops[j].genomic_length;
540 		}
541 
542 
543 		return len;
544 	}
545 
546 	//void augmented_ops(vector<AugmentedCuffOp>& augmented) const;
547 
548 	// The "score" of a merge is the log odds of lhs and rhs coming from
549 	// different transcripts.  We want to minimize the total score of
550 	// all of our merges.
551 	static double score_merge(const Scaffold& lhs, const Scaffold& rhs);
552 
553 	double score() const;
554 
555 	double worst_mate_score() const;
556 
557 	pair<int,int> genomic_to_transcript_span(pair<int,int> g_span) const;
558 	int genomic_to_transcript_coord(int g_coord) const;
559 	bool map_frag(const MateHit& hit, int& start, int& end, int& frag_len) const;
560 
561 
562 	static bool g_left_lt(const AugmentedCuffOp& lhs,
563 						  const AugmentedCuffOp& rhs);
564 
augmented_ops()565 	const vector<AugmentedCuffOp>& augmented_ops() const { return _augmented_ops; }
augmented_ops(vector<AugmentedCuffOp> aug_ops)566 	void augmented_ops(vector<AugmentedCuffOp> aug_ops) { _augmented_ops = aug_ops; }
567 
568 
569 	static bool overlap_in_genome(const Scaffold& lhs,
570 								  const Scaffold& rhs,
571 								  int overlap_radius);
572 
gaps()573 	vector<pair< int, int> > gaps() const
574 	{
575 		vector<pair<int,int> > g;
576 		const vector<AugmentedCuffOp>& ops = augmented_ops();
577 		for (size_t j = 0; j < ops.size(); ++j)
578 		{
579 			if (ops[j].opcode == CUFF_INTRON)
580 			{
581 				g.push_back(make_pair(ops[j].g_left(), ops[j].g_right()));
582 			}
583 		}
584 		return g;
585 	}
586 
has_unknown()587 	inline bool has_unknown() const
588 	{
589 		const vector<AugmentedCuffOp>& ops = augmented_ops();
590 		for (size_t j = 0; j < ops.size(); ++j)
591 		{
592 			if (ops[j].opcode == CUFF_UNKNOWN)
593 				return true;
594 		}
595 		return false;
596 	}
597 
598     // Fills in CUFF_UNKNOWNs up to filled_gap_size long
599 	void fill_gaps(int filled_gap_size);
600 
601     // Fills in CUFF_UNKNOWNs with the contents of filler.  Filler must be
602     // a sortted, contiguous, non-overlapping vector of AugmentedCuffOps
603     void fill_gaps(const vector<AugmentedCuffOp>& filler);
604 
605 	void clear_hits();
606 	bool add_hit(const MateHit*);
607 
608 	void get_complete_subscaffolds(vector<Scaffold>& complete);
609 private:
610 
611 	void initialize_exon_lists();
612 
has_intron(const Scaffold & scaff)613 	static bool has_intron(const Scaffold& scaff)
614 	{
615 
616 		const vector<AugmentedCuffOp>& ops = scaff.augmented_ops();
617 		for (size_t j = 0; j < ops.size(); ++j)
618 		{
619 			if (ops[j].opcode == CUFF_INTRON)
620 				return true;
621 		}
622 
623 		return false;
624 	}
625 
has_suspicious_unknown(const Scaffold & scaff)626 	static bool has_suspicious_unknown(const Scaffold& scaff)
627 	{
628 
629 		const vector<AugmentedCuffOp>& ops = scaff.augmented_ops();
630 		for (size_t j = 0; j < ops.size(); ++j)
631 		{
632 			if (ops[j].opcode == CUFF_UNKNOWN &&
633 				ops[j].genomic_length > max_frag_len)
634 				return true;
635 		}
636 
637 		return false;
638 	}
639 
640 	static double score_overlap(const AugmentedCuffOp& op,
641 								int inner_left_edge,
642 								int inner_right_edge);
643 
644 	static double score_contigs(const Scaffold& contig,
645 								const vector<pair<int, int> >& inners);
646 
647 	static double min_score(const Scaffold& contig,
648 							const vector<pair<int, int> >& inners);
649 
650 	static bool compatible_contigs(const Scaffold& lhs,
651 											const Scaffold& rhs,
652 											int overhang_tolerance = bowtie_overhang_tolerance);
653 
654 
655 	typedef vector<AugmentedCuffOp> OpList;
656 
657 	bool check_merge_length(const vector<AugmentedCuffOp>& ops);
658 
659 	static void fill_interstices(vector<AugmentedCuffOp>& to_fill,
660 								 const vector<AugmentedCuffOp>& filler,
661 								 bool allow_flank_fill);
662 
663 	static void merge_ops(const std::vector<AugmentedCuffOp>& ops,
664 						  vector<AugmentedCuffOp>& merged,
665 						  bool introns_overwrite_matches);
666 
667 	vector<const MateHit*> _mates_in_scaff;
668 
669 	bool _has_intron;
670 	bool _is_ref;
671     bool _is_pseudo_primary;
672 
673 	vector<AugmentedCuffOp> _augmented_ops;
674 	CuffStrand _strand;
675 
676 	string _annotated_trans_id;
677 	string _annotated_gene_id;
678 	string _annotated_gene_name;
679 	string _annotated_protein_id;
680 	string _annotated_tss_id;
681 	string _nearest_ref_id;
682 	char _classcode;
683 
684 	string _seq;
685 	double _fpkm;
686 	double _num_fragments;
687 };
688 
689 bool scaff_lt(const Scaffold& lhs, const Scaffold& rhs);
690 bool scaff_lt_rt(const Scaffold& lhs, const Scaffold& rhs);
691 bool scaff_lt_rt_oplt(const Scaffold& lhs, const Scaffold& rhs);
692 bool scaff_lt_sp(boost::shared_ptr<Scaffold> lhs, boost::shared_ptr<Scaffold> rhs);
693 bool scaff_lt_rt_sp(boost::shared_ptr<Scaffold> lhs, boost::shared_ptr<Scaffold> rhs);
694 bool scaff_lt_rt_oplt_sp(boost::shared_ptr<Scaffold> lhs, boost::shared_ptr<Scaffold> rhs);
695 
696 
697 bool overlap_in_genome(int ll, int lr, int rl, int rr);
698 
699 struct StructurallyEqualScaffolds
700 {
operatorStructurallyEqualScaffolds701 	bool operator()(boost::shared_ptr<Scaffold> lhs, boost::shared_ptr<Scaffold> rhs)
702 	{
703 		return lhs->ref_id() == rhs->ref_id() &&
704 		lhs->augmented_ops() == rhs->augmented_ops();
705 	}
706 };
707 
708 #endif
709