1 /* $Id: traversal_merger.cpp 257155 2011-03-10 15:28:28Z kornbluh $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Michael Kornbluh
27 *
28 * File Description:
29 * Tries to merge user functions that are identical.
30 */
31
32 #include <ncbi_pch.hpp>
33 #include "traversal_merger.hpp"
34 #include <iterator>
35
36 BEGIN_NCBI_SCOPE
37
CTraversalMerger(const CTraversalNode::TNodeVec & rootTraversalNodes,const CTraversalNode::TNodeSet & nodesWithFunctions)38 CTraversalMerger::CTraversalMerger(
39 const CTraversalNode::TNodeVec &rootTraversalNodes,
40 const CTraversalNode::TNodeSet &nodesWithFunctions )
41 : m_RootTraversalNodes( rootTraversalNodes.begin(), rootTraversalNodes.end() )
42 {
43 // we do a partial breadth-first search upward from the nodes with functions, merging nodes wherever we can
44 CTraversalNode::TNodeSet currentTier;
45 CTraversalNode::TNodeSet nextTier;
46
47 // Don't forget to consider cycles.
48 // e.g. A -> B -> C -> A(again), but where
49 // A, B and C can be merged into other nodes D, E and F
50 // that have an identical cycle with the same types and
51 // everything.
52 ITERATE( CTraversalNode::TNodeSet, node_iter, nodesWithFunctions ) {
53 currentTier.insert( *node_iter );
54 }
55 while( ! currentTier.empty() ) {
56 x_DoTier( currentTier, nextTier );
57
58 // switch to next tier
59 nextTier.swap( currentTier );
60 nextTier.clear();
61 }
62 }
63
64 template< typename TIter1, typename TIter2, typename TComparator >
s_Lexicographical_compare_arrays(TIter1 begin1,TIter1 end1,TIter2 begin2,TIter2 end2,TComparator comparator)65 static int s_Lexicographical_compare_arrays(
66 TIter1 begin1, TIter1 end1,
67 TIter2 begin2, TIter2 end2,
68 TComparator comparator )
69 {
70 TIter1 iter1 = begin1;
71 TIter2 iter2 = begin2;
72
73 for( ; iter1 != end1 && iter2 != end2 ; ++iter1, ++iter2 ) {
74 const int comparison = comparator( *iter1, *iter2 );
75 if( comparison != 0 ) {
76 return comparison;
77 }
78 }
79
80 // shorter sequence first
81 if( iter1 == end1 ) {
82 if( iter2 == end2 ) {
83 return 0;
84 } else {
85 return -1;
86 }
87 } else if( iter2 == end2 ) {
88 return 1;
89 }
90 _ASSERT(false); // should be impossible to reach here
91 return 0;
92 }
93
94 class CCompareCRefUserCall
95 {
96 public:
operator ()(const CRef<CTraversalNode::CUserCall> & call1,const CRef<CTraversalNode::CUserCall> & call2)97 int operator()(
98 const CRef<CTraversalNode::CUserCall> &call1,
99 const CRef<CTraversalNode::CUserCall> &call2 )
100 {
101 return call1->Compare( *call2 );
102 }
103 };
104
105 int CTraversalMerger::CNodeLabeler::ms_NumUnmergeableSoFar = 0;
106
CNodeLabeler(ncbi::CRef<CTraversalNode> node,string & out_result,ERootEncountered & out_root_encountered,const CTraversalNode::TNodeSet & root_nodes)107 CTraversalMerger::CNodeLabeler::CNodeLabeler(
108 ncbi::CRef<CTraversalNode> node, string &out_result,
109 ERootEncountered &out_root_encountered,
110 const CTraversalNode::TNodeSet &root_nodes )
111 : m_PrevNodeInt(0), m_Out_result(out_result),
112 m_Out_root_encountered(out_root_encountered),
113 m_Root_nodes(root_nodes)
114 {
115 // clear args
116 m_Out_result.clear();
117 m_Out_root_encountered = eRootEncountered_No;
118
119 // assign values to output variables
120 x_AppendNodeLabelRecursively( node );
121
122 // Before leaving, make sure all dependencies are met,
123 // otherwise we mark the node as unmergeable
124 ITERATE( set< CRef<CTraversalNode> >, dependency_iter, m_DependencyNodes ) {
125 if( m_NodeToIntMap.find(*dependency_iter) == m_NodeToIntMap.end() ) {
126 // There's a dependency outside of the nodes we encountered, so give
127 // this node a unique label so it won't be merged into anything.
128 m_Out_result = "(UNMERGEABLE" + NStr::IntToString(++ms_NumUnmergeableSoFar) + ")";
129 break;
130 }
131 }
132 }
133
134 void
x_AppendNodeLabelRecursively(CRef<CTraversalNode> node)135 CTraversalMerger::CNodeLabeler::x_AppendNodeLabelRecursively( CRef<CTraversalNode> node )
136 {
137 if( eIsCyclic_NonCyclic == x_AppendDirectNodeLabel( node ) ) {
138 // If non-cyclic call, we're free to traverse the children, if there are any
139 if( ! node->GetCallees().empty() ) {
140 m_Out_result += "{";
141 ITERATE( CTraversalNode::TNodeCallSet, callee_iter, node->GetCallees() ) {
142 x_AppendNodeLabelRecursively( (*callee_iter)->GetNode() );
143 m_Out_result += ","; // we don't care if last one has unnecessary comma
144 }
145 m_Out_result += "}";
146 }
147 }
148 }
149
150 // non-recursive. Just gives the plain label
151 CTraversalMerger::CNodeLabeler::EIsCyclic
x_AppendDirectNodeLabel(CRef<CTraversalNode> node)152 CTraversalMerger::CNodeLabeler::x_AppendDirectNodeLabel( CRef<CTraversalNode> node )
153 {
154 // First check if the node already has an int.
155 // If so, we just return that number as a string
156 TNodeToIntMap::iterator node_location = m_NodeToIntMap.find( node );
157 if( node_location != m_NodeToIntMap.end() ) {
158 m_Out_result += NStr::IntToString( node_location->second );
159 return eIsCyclic_Cyclic;
160 }
161
162 // Otherwise, we have to create the string ourselves:
163 const int node_int = ++m_PrevNodeInt;
164 m_Out_result += "(" + NStr::IntToString( node_int );
165 m_NodeToIntMap[node] = node_int; // store the int for later use
166
167 // name does NOT include fields that don't directly affect:
168 // the function's behavior( e.g. funcname, callers, etc.)
169 // The label doesn't include callees, but that's only
170 // because it will be added recursively elsewhere
171
172 m_Out_result += "[";
173 ITERATE( CTraversalNode::TUserCallVec, pre_user_call_iter, node->GetPreCalleesUserCalls() ) {
174 // we don't care if the last one has an unnecessary comma
175 m_Out_result += (*pre_user_call_iter)->GetUserFuncName() + ",";
176 const CTraversalNode::TNodeVec &node_vec = (*pre_user_call_iter)->GetExtraArgNodes();
177 copy( node_vec.begin(), node_vec.end(),
178 inserter(m_DependencyNodes, m_DependencyNodes.begin()) );
179 }
180 m_Out_result += "][";
181 ITERATE( CTraversalNode::TUserCallVec, post_user_call_iter, node->GetPostCalleesUserCalls() ) {
182 // we don't care if the last one has an unnecessary comma
183 m_Out_result += (*post_user_call_iter)->GetUserFuncName() + ",";
184 const CTraversalNode::TNodeVec &node_vec = (*post_user_call_iter)->GetExtraArgNodes();
185 copy( node_vec.begin(), node_vec.end(),
186 inserter(m_DependencyNodes, m_DependencyNodes.begin()) );
187 }
188 m_Out_result += "]";
189
190 m_Out_result += node->GetTypeAsString() + ",";
191 m_Out_result += node->GetInputClassName();
192
193 m_Out_result += ")";
194
195 // check if the given node is a root node
196 if( m_Root_nodes.find(node) != m_Root_nodes.end() ) {
197 m_Out_root_encountered = eRootEncountered_Yes;
198 }
199
200 return eIsCyclic_NonCyclic;
201 }
202
203 void
x_DoTier(const CTraversalNode::TNodeSet & currentTier,CTraversalNode::TNodeSet & nextTier)204 CTraversalMerger::x_DoTier(
205 const CTraversalNode::TNodeSet ¤tTier,
206 CTraversalNode::TNodeSet &nextTier )
207 {
208 _ASSERT( nextTier.empty() );
209 // see which nodes in the current tier can be merged together and
210 // set the callers of any merged nodes to be the nextTier, since
211 // they might become mergeable.
212
213 // In the "pair", the bool is true iff the node calls a
214 // root node somehow (directly or indirectly or *is* a root)
215 typedef pair< bool, CRef<CTraversalNode> > TMergeMapPair;
216 typedef vector<TMergeMapPair> TMergeMapPairVec;
217 // maps a node's label to all the nodes that have that same label
218 // (and are therefore mergeable)
219 typedef map< string, TMergeMapPairVec > TMergeMap;
220
221 TMergeMap merge_map;
222 ITERATE( CTraversalNode::TNodeSet, node_iter, currentTier ) {
223 string node_label;
224 // A node's label is the same as another nodes iff
225 // they can be merged
226 CNodeLabeler::ERootEncountered root_encountered =
227 CNodeLabeler::eRootEncountered_No;
228 CNodeLabeler( *node_iter, node_label, root_encountered, m_RootTraversalNodes );
229 merge_map[node_label].push_back(
230 TMergeMapPair(
231 root_encountered != CNodeLabeler::eRootEncountered_No, *node_iter ) );
232 }
233
234 // for each mergeable set of nodes, merge them and place their callers on
235 // the next tier
236 NON_CONST_ITERATE( TMergeMap, merge_iter, merge_map ) {
237 TMergeMapPairVec &merge_vec = merge_iter->second;
238 if( merge_vec.size() > 1 ) {
239 // merge all nodes into the node with the shortest func name,
240 // or, if there's a root node, into the root node
241 // ( Watch out for the case of multiple root nodes! )
242
243 TMergeMapPairVec::iterator do_merge_iter =
244 merge_vec.begin();
245 // length of name, or negative if it's a root node,
246 // since we prefer root nodes
247 int target_badness = kMax_Int;
248 CRef<CTraversalNode> target = do_merge_iter->second;
249 while( do_merge_iter != merge_vec.end() ) {
250 CRef<CTraversalNode> current_node = do_merge_iter->second;
251 const bool has_root = do_merge_iter->first;
252 if( has_root ) {
253 // if there's already a root, then just remove
254 // this one from the mergeable list
255 if( target_badness < 0 ) {
256 // This might be slow. Maybe use lists instead?
257 // ( Performance empirically acceptable for now,
258 // though )
259 do_merge_iter = merge_vec.erase( do_merge_iter );
260 continue;
261 }
262 target = current_node;
263 target_badness = -1; // guarantee that we use the root
264 } else if( (int)current_node->GetFuncName().length() < target_badness ) {
265 target = current_node;
266 }
267 ++do_merge_iter;
268 }
269
270 do_merge_iter = merge_vec.begin();
271 for( ; do_merge_iter != merge_vec.end(); ++do_merge_iter ) {
272 // see if the target itself is a root
273 // (This is different from "has_root" which checks if
274 // the node or any of its direct or indirect callees are a root )
275 const bool target_is_itself_a_root =
276 ( m_RootTraversalNodes.find(target) != m_RootTraversalNodes.end() );
277 // "Merge()" will properly detect the case of trying to merge a node into itself
278 target->Merge( do_merge_iter->second,
279 ( target_is_itself_a_root ?
280 CTraversalNode::eMergeNameAllowed_ForbidNameChange :
281 CTraversalNode::eMergeNameAllowed_AllowNameChange ) );
282 }
283 x_AddCallersToTier( target, nextTier );
284 }
285 }
286 }
287
x_AddCallersToTier(CRef<CTraversalNode> current_node,CTraversalNode::TNodeSet & tier)288 void CTraversalMerger::x_AddCallersToTier(
289 CRef<CTraversalNode> current_node,
290 CTraversalNode::TNodeSet &tier )
291 {
292 const CTraversalNode::TNodeCallSet &callers = current_node->GetCallers();
293 ITERATE( CTraversalNode::TNodeCallSet, caller_iter, callers ) {
294 tier.insert( (*caller_iter)->GetNode() );
295 }
296 }
297
298 END_NCBI_SCOPE
299