1 /* $Id: cuBlockIntersector.cpp 356144 2012-03-12 18:16:41Z lanczyck $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Charlie
27  *
28  * File Description:
29  *
30  *       to find the intersection of the group of alignments on the same sequence
31  *
32  * ===========================================================================
33  */
34 
35 #include <ncbi_pch.hpp>
36 #include <algo/structure/cd_utils/cuBlockIntersector.hpp>
37 
38 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(cd_utils)39 BEGIN_SCOPE(cd_utils)
40 
41 
42 BlockIntersector::BlockIntersector(int seqLen) : m_seqLen(seqLen), m_totalRows(0),
43 	m_firstBm(0), m_aligned(0)
44 {
45 	m_aligned = new unsigned[seqLen];
46 	for (int i = 0; i < seqLen; i++)
47 		m_aligned[i] = 0;
48 }
49 
~BlockIntersector()50 BlockIntersector::~BlockIntersector()
51 {
52     delete m_firstBm;
53 	if (m_aligned)
54 		delete []m_aligned;
55 }
56 
addOneAlignment(const BlockModel & bm)57 void BlockIntersector::addOneAlignment(const BlockModel& bm)
58 {
59 	if (!m_firstBm)
60 	{
61 //		m_firstBm = &bm;
62         m_firstBm = new BlockModel(bm);  //  keep a copy; BlockExtender now passes a local variable
63 
64 	}
65 	m_totalRows++;
66 	const std::vector<Block>& blocks = bm.getBlocks();
67 	for(unsigned int b = 0; b < blocks.size(); b++)
68 		for(int pos = blocks[b].getStart(); pos <= blocks[b].getEnd(); pos++)
69 			m_aligned[pos]++;
70 }
71 
removeOneAlignment(const BlockModel & bm)72 void BlockIntersector::removeOneAlignment(const BlockModel& bm)
73 {
74 	m_totalRows--;
75 	const std::vector<Block>& blocks = bm.getBlocks();
76 	for(unsigned int b = 0; b < blocks.size(); b++)
77 		for(int pos = blocks[b].getStart(); pos <= blocks[b].getEnd(); pos++)
78 			if (m_aligned[pos] > 0)
79 				m_aligned[pos]--;
80 }
81 
82 
getIntersectedAlignment(double rowFraction)83 BlockModel* BlockIntersector::getIntersectedAlignment(double rowFraction)
84 {
85 	BlockModel* result = new BlockModel(*m_firstBm);
86 	if (m_totalRows <= 1)
87 		return result;
88 	std::vector<Block>& blocks = result->getBlocks();
89 	blocks.clear();
90 	bool inBlock = false;
91 	int start = 0;
92 	int blockId = 0;
93 	int i = 0;
94 
95     if (rowFraction < 0.0 || rowFraction > 1.0) rowFraction = 1.0;
96     double adjustedTotalRows = m_totalRows * rowFraction;
97 
98 	for (; i < m_seqLen; i++)
99 	{
100 		if (!inBlock)
101 		{
102 			if (m_aligned[i] >= adjustedTotalRows)
103 			{
104 				start = i;
105 				inBlock = true;
106 			}
107 		}
108 		else
109 		{
110 			if (m_aligned[i] < adjustedTotalRows)
111 			{
112 				inBlock = false;
113 				blocks.push_back(Block(start, i - start, blockId));
114 				blockId++;
115 			}
116 		}
117 	}
118 	if (inBlock) //block goes to the end of the sequence
119 		blocks.push_back(Block(start, i - start, blockId));
120 	return result;
121 }
122 
getIntersectedAlignment(const std::set<int> & forcedBreak,double rowFraction)123 BlockModel* BlockIntersector::getIntersectedAlignment(const std::set<int>& forcedBreak, double rowFraction)
124 {
125 	BlockModel* result = new BlockModel(*m_firstBm);
126 	if (m_totalRows <= 1)
127 		return result;
128 	std::vector<Block>& blocks = result->getBlocks();
129     std::set<int>::const_iterator setEnd = forcedBreak.end();
130 	blocks.clear();
131 	bool inBlock = false;
132     bool forceNewBlock = false;
133 	int start = 0;
134 	int blockId = 0;
135 	int i = 0;
136 
137     if (rowFraction <= 0.0 || rowFraction > 1.0) rowFraction = 1.0;
138     double adjustedTotalRows = m_totalRows * rowFraction;
139 
140 	for (; i < m_seqLen; i++)
141 	{
142 
143         //  previous position was not in a block so forcedBreak is irrelevant
144 		if (!inBlock)
145 		{
146 			if (m_aligned[i] >= adjustedTotalRows)
147 			{
148 				start = i;
149 				inBlock = true;
150 			}
151 		}
152 		else
153 		{
154             //  was the previous position a forced C-terminus?
155             forceNewBlock = (i > 0 && forcedBreak.find(i - 1) != setEnd);
156 
157             //  it's a C-termini w/o any forcing
158             if (m_aligned[i] < adjustedTotalRows)
159 			{
160 				inBlock = false;
161 				blocks.push_back(Block(start, i - start, blockId));
162 				blockId++;
163 			}
164             //  need to break this block and immediately start a new one
165             else if (forceNewBlock)
166             {
167 				blocks.push_back(Block(start, i - start, blockId));
168 				blockId++;
169                 start = i;
170             }
171 		}
172 	}
173 	if (inBlock) //block goes to the end of the sequence
174 		blocks.push_back(Block(start, i - start, blockId));
175 	return result;
176 }
177 
178 END_SCOPE(cd_utils)
179 END_NCBI_SCOPE
180