1 /* $Id: cuBlockIntersector.cpp 356144 2012-03-12 18:16:41Z lanczyck $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Charlie
27 *
28 * File Description:
29 *
30 * to find the intersection of the group of alignments on the same sequence
31 *
32 * ===========================================================================
33 */
34
35 #include <ncbi_pch.hpp>
36 #include <algo/structure/cd_utils/cuBlockIntersector.hpp>
37
38 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(cd_utils)39 BEGIN_SCOPE(cd_utils)
40
41
42 BlockIntersector::BlockIntersector(int seqLen) : m_seqLen(seqLen), m_totalRows(0),
43 m_firstBm(0), m_aligned(0)
44 {
45 m_aligned = new unsigned[seqLen];
46 for (int i = 0; i < seqLen; i++)
47 m_aligned[i] = 0;
48 }
49
~BlockIntersector()50 BlockIntersector::~BlockIntersector()
51 {
52 delete m_firstBm;
53 if (m_aligned)
54 delete []m_aligned;
55 }
56
addOneAlignment(const BlockModel & bm)57 void BlockIntersector::addOneAlignment(const BlockModel& bm)
58 {
59 if (!m_firstBm)
60 {
61 // m_firstBm = &bm;
62 m_firstBm = new BlockModel(bm); // keep a copy; BlockExtender now passes a local variable
63
64 }
65 m_totalRows++;
66 const std::vector<Block>& blocks = bm.getBlocks();
67 for(unsigned int b = 0; b < blocks.size(); b++)
68 for(int pos = blocks[b].getStart(); pos <= blocks[b].getEnd(); pos++)
69 m_aligned[pos]++;
70 }
71
removeOneAlignment(const BlockModel & bm)72 void BlockIntersector::removeOneAlignment(const BlockModel& bm)
73 {
74 m_totalRows--;
75 const std::vector<Block>& blocks = bm.getBlocks();
76 for(unsigned int b = 0; b < blocks.size(); b++)
77 for(int pos = blocks[b].getStart(); pos <= blocks[b].getEnd(); pos++)
78 if (m_aligned[pos] > 0)
79 m_aligned[pos]--;
80 }
81
82
getIntersectedAlignment(double rowFraction)83 BlockModel* BlockIntersector::getIntersectedAlignment(double rowFraction)
84 {
85 BlockModel* result = new BlockModel(*m_firstBm);
86 if (m_totalRows <= 1)
87 return result;
88 std::vector<Block>& blocks = result->getBlocks();
89 blocks.clear();
90 bool inBlock = false;
91 int start = 0;
92 int blockId = 0;
93 int i = 0;
94
95 if (rowFraction < 0.0 || rowFraction > 1.0) rowFraction = 1.0;
96 double adjustedTotalRows = m_totalRows * rowFraction;
97
98 for (; i < m_seqLen; i++)
99 {
100 if (!inBlock)
101 {
102 if (m_aligned[i] >= adjustedTotalRows)
103 {
104 start = i;
105 inBlock = true;
106 }
107 }
108 else
109 {
110 if (m_aligned[i] < adjustedTotalRows)
111 {
112 inBlock = false;
113 blocks.push_back(Block(start, i - start, blockId));
114 blockId++;
115 }
116 }
117 }
118 if (inBlock) //block goes to the end of the sequence
119 blocks.push_back(Block(start, i - start, blockId));
120 return result;
121 }
122
getIntersectedAlignment(const std::set<int> & forcedBreak,double rowFraction)123 BlockModel* BlockIntersector::getIntersectedAlignment(const std::set<int>& forcedBreak, double rowFraction)
124 {
125 BlockModel* result = new BlockModel(*m_firstBm);
126 if (m_totalRows <= 1)
127 return result;
128 std::vector<Block>& blocks = result->getBlocks();
129 std::set<int>::const_iterator setEnd = forcedBreak.end();
130 blocks.clear();
131 bool inBlock = false;
132 bool forceNewBlock = false;
133 int start = 0;
134 int blockId = 0;
135 int i = 0;
136
137 if (rowFraction <= 0.0 || rowFraction > 1.0) rowFraction = 1.0;
138 double adjustedTotalRows = m_totalRows * rowFraction;
139
140 for (; i < m_seqLen; i++)
141 {
142
143 // previous position was not in a block so forcedBreak is irrelevant
144 if (!inBlock)
145 {
146 if (m_aligned[i] >= adjustedTotalRows)
147 {
148 start = i;
149 inBlock = true;
150 }
151 }
152 else
153 {
154 // was the previous position a forced C-terminus?
155 forceNewBlock = (i > 0 && forcedBreak.find(i - 1) != setEnd);
156
157 // it's a C-termini w/o any forcing
158 if (m_aligned[i] < adjustedTotalRows)
159 {
160 inBlock = false;
161 blocks.push_back(Block(start, i - start, blockId));
162 blockId++;
163 }
164 // need to break this block and immediately start a new one
165 else if (forceNewBlock)
166 {
167 blocks.push_back(Block(start, i - start, blockId));
168 blockId++;
169 start = i;
170 }
171 }
172 }
173 if (inBlock) //block goes to the end of the sequence
174 blocks.push_back(Block(start, i - start, blockId));
175 return result;
176 }
177
178 END_SCOPE(cd_utils)
179 END_NCBI_SCOPE
180