1 /*  $Id: traversal_merger.cpp 257155 2011-03-10 15:28:28Z kornbluh $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Michael Kornbluh
27 *
28 * File Description:
29 *   Tries to merge user functions that are identical.
30 */
31 
32 #include <ncbi_pch.hpp>
33 #include "traversal_merger.hpp"
34 #include <iterator>
35 
36 BEGIN_NCBI_SCOPE
37 
CTraversalMerger(const CTraversalNode::TNodeVec & rootTraversalNodes,const CTraversalNode::TNodeSet & nodesWithFunctions)38 CTraversalMerger::CTraversalMerger(
39         const CTraversalNode::TNodeVec &rootTraversalNodes,
40         const CTraversalNode::TNodeSet &nodesWithFunctions )
41         : m_RootTraversalNodes( rootTraversalNodes.begin(), rootTraversalNodes.end() )
42 {
43     // we do a partial breadth-first search upward from the nodes with functions, merging nodes wherever we can
44     CTraversalNode::TNodeSet currentTier;
45     CTraversalNode::TNodeSet nextTier;
46 
47     // Don't forget to consider cycles.
48     // e.g. A -> B -> C -> A(again), but where
49     // A, B and C can be merged into other nodes D, E and F
50     // that have an identical cycle with the same types and
51     // everything.
52     ITERATE( CTraversalNode::TNodeSet, node_iter, nodesWithFunctions ) {
53         currentTier.insert( *node_iter );
54     }
55     while( ! currentTier.empty() ) {
56         x_DoTier( currentTier, nextTier );
57 
58         // switch to next tier
59         nextTier.swap( currentTier );
60         nextTier.clear();
61     }
62 }
63 
64 template< typename TIter1, typename TIter2, typename TComparator >
s_Lexicographical_compare_arrays(TIter1 begin1,TIter1 end1,TIter2 begin2,TIter2 end2,TComparator comparator)65 static int s_Lexicographical_compare_arrays(
66     TIter1 begin1, TIter1 end1,
67     TIter2 begin2, TIter2 end2,
68     TComparator comparator )
69 {
70     TIter1 iter1 = begin1;
71     TIter2 iter2 = begin2;
72 
73     for( ; iter1 != end1 && iter2 != end2 ; ++iter1, ++iter2 ) {
74         const int comparison = comparator( *iter1, *iter2 );
75         if( comparison != 0 ) {
76             return comparison;
77         }
78     }
79 
80     // shorter sequence first
81     if( iter1 == end1 ) {
82         if( iter2 == end2 ) {
83             return 0;
84         } else {
85             return -1;
86         }
87     } else if( iter2 == end2 ) {
88         return 1;
89     }
90     _ASSERT(false); // should be impossible to reach here
91     return 0;
92 }
93 
94 class CCompareCRefUserCall
95 {
96 public:
operator ()(const CRef<CTraversalNode::CUserCall> & call1,const CRef<CTraversalNode::CUserCall> & call2)97     int operator()(
98         const CRef<CTraversalNode::CUserCall> &call1,
99         const CRef<CTraversalNode::CUserCall> &call2 )
100     {
101         return call1->Compare( *call2 );
102     }
103 };
104 
105 int CTraversalMerger::CNodeLabeler::ms_NumUnmergeableSoFar = 0;
106 
CNodeLabeler(ncbi::CRef<CTraversalNode> node,string & out_result,ERootEncountered & out_root_encountered,const CTraversalNode::TNodeSet & root_nodes)107 CTraversalMerger::CNodeLabeler::CNodeLabeler(
108     ncbi::CRef<CTraversalNode> node, string &out_result,
109     ERootEncountered &out_root_encountered,
110     const CTraversalNode::TNodeSet &root_nodes )
111 : m_PrevNodeInt(0), m_Out_result(out_result),
112     m_Out_root_encountered(out_root_encountered),
113     m_Root_nodes(root_nodes)
114 {
115     // clear args
116     m_Out_result.clear();
117     m_Out_root_encountered = eRootEncountered_No;
118 
119     // assign values to output variables
120     x_AppendNodeLabelRecursively( node );
121 
122     // Before leaving, make sure all dependencies are met,
123     // otherwise we mark the node as unmergeable
124     ITERATE( set< CRef<CTraversalNode> >, dependency_iter, m_DependencyNodes ) {
125         if( m_NodeToIntMap.find(*dependency_iter) == m_NodeToIntMap.end() ) {
126             // There's a dependency outside of the nodes we encountered, so give
127             // this node a unique label so it won't be merged into anything.
128             m_Out_result = "(UNMERGEABLE" + NStr::IntToString(++ms_NumUnmergeableSoFar) + ")";
129             break;
130         }
131     }
132 }
133 
134 void
x_AppendNodeLabelRecursively(CRef<CTraversalNode> node)135 CTraversalMerger::CNodeLabeler::x_AppendNodeLabelRecursively( CRef<CTraversalNode> node )
136 {
137     if( eIsCyclic_NonCyclic == x_AppendDirectNodeLabel( node ) ) {
138         // If non-cyclic call, we're free to traverse the children, if there are any
139         if( ! node->GetCallees().empty() ) {
140             m_Out_result += "{";
141             ITERATE( CTraversalNode::TNodeCallSet, callee_iter, node->GetCallees() ) {
142                 x_AppendNodeLabelRecursively( (*callee_iter)->GetNode() );
143                 m_Out_result += ","; // we don't care if last one has unnecessary comma
144             }
145             m_Out_result += "}";
146         }
147     }
148 }
149 
150 // non-recursive. Just gives the plain label
151 CTraversalMerger::CNodeLabeler::EIsCyclic
x_AppendDirectNodeLabel(CRef<CTraversalNode> node)152 CTraversalMerger::CNodeLabeler::x_AppendDirectNodeLabel( CRef<CTraversalNode> node )
153 {
154     // First check if the node already has an int.
155     // If so, we just return that number as a string
156     TNodeToIntMap::iterator node_location = m_NodeToIntMap.find( node );
157     if( node_location != m_NodeToIntMap.end() ) {
158         m_Out_result += NStr::IntToString( node_location->second );
159         return eIsCyclic_Cyclic;
160     }
161 
162     // Otherwise, we have to create the string ourselves:
163     const int node_int = ++m_PrevNodeInt;
164     m_Out_result += "(" + NStr::IntToString( node_int );
165     m_NodeToIntMap[node] = node_int; // store the int for later use
166 
167     // name does NOT include fields that don't directly affect:
168     // the function's behavior( e.g. funcname, callers, etc.)
169     // The label doesn't include callees, but that's only
170     // because it will be added recursively elsewhere
171 
172     m_Out_result += "[";
173     ITERATE( CTraversalNode::TUserCallVec, pre_user_call_iter, node->GetPreCalleesUserCalls() ) {
174         // we don't care if the last one has an unnecessary comma
175         m_Out_result += (*pre_user_call_iter)->GetUserFuncName() + ",";
176         const CTraversalNode::TNodeVec &node_vec = (*pre_user_call_iter)->GetExtraArgNodes();
177         copy( node_vec.begin(), node_vec.end(),
178             inserter(m_DependencyNodes, m_DependencyNodes.begin()) );
179     }
180     m_Out_result += "][";
181     ITERATE( CTraversalNode::TUserCallVec, post_user_call_iter, node->GetPostCalleesUserCalls() ) {
182         // we don't care if the last one has an unnecessary comma
183         m_Out_result += (*post_user_call_iter)->GetUserFuncName() + ",";
184         const CTraversalNode::TNodeVec &node_vec = (*post_user_call_iter)->GetExtraArgNodes();
185         copy( node_vec.begin(), node_vec.end(),
186             inserter(m_DependencyNodes, m_DependencyNodes.begin()) );
187     }
188     m_Out_result += "]";
189 
190     m_Out_result += node->GetTypeAsString() + ",";
191     m_Out_result += node->GetInputClassName();
192 
193     m_Out_result += ")";
194 
195     // check if the given node is a root node
196     if( m_Root_nodes.find(node) != m_Root_nodes.end() ) {
197         m_Out_root_encountered = eRootEncountered_Yes;
198     }
199 
200     return eIsCyclic_NonCyclic;
201 }
202 
203 void
x_DoTier(const CTraversalNode::TNodeSet & currentTier,CTraversalNode::TNodeSet & nextTier)204 CTraversalMerger::x_DoTier(
205     const CTraversalNode::TNodeSet &currentTier,
206     CTraversalNode::TNodeSet &nextTier )
207 {
208     _ASSERT( nextTier.empty() );
209     // see which nodes in the current tier can be merged together and
210     // set the callers of any merged nodes to be the nextTier, since
211     // they might become mergeable.
212 
213     // In the "pair", the bool is true iff the node calls a
214     // root node somehow (directly or indirectly or *is* a root)
215     typedef pair< bool, CRef<CTraversalNode> > TMergeMapPair;
216     typedef vector<TMergeMapPair> TMergeMapPairVec;
217     // maps a node's label to all the nodes that have that same label
218     // (and are therefore mergeable)
219     typedef map< string, TMergeMapPairVec > TMergeMap;
220 
221     TMergeMap merge_map;
222     ITERATE( CTraversalNode::TNodeSet, node_iter, currentTier ) {
223         string node_label;
224         // A node's label is the same as another nodes iff
225         // they can be merged
226         CNodeLabeler::ERootEncountered root_encountered =
227             CNodeLabeler::eRootEncountered_No;
228         CNodeLabeler( *node_iter, node_label, root_encountered, m_RootTraversalNodes );
229         merge_map[node_label].push_back(
230             TMergeMapPair(
231                 root_encountered != CNodeLabeler::eRootEncountered_No, *node_iter ) );
232     }
233 
234     // for each mergeable set of nodes, merge them and place their callers on
235     // the next tier
236     NON_CONST_ITERATE( TMergeMap, merge_iter, merge_map ) {
237         TMergeMapPairVec &merge_vec = merge_iter->second;
238         if( merge_vec.size() > 1 ) {
239             // merge all nodes into the node with the shortest func name,
240             // or, if there's a root node, into the root node
241             // ( Watch out for the case of multiple root nodes! )
242 
243             TMergeMapPairVec::iterator do_merge_iter =
244                 merge_vec.begin();
245             // length of name, or negative if it's a root node,
246             // since we prefer root nodes
247             int target_badness = kMax_Int;
248             CRef<CTraversalNode> target = do_merge_iter->second;
249             while( do_merge_iter != merge_vec.end() ) {
250                 CRef<CTraversalNode> current_node = do_merge_iter->second;
251                 const bool has_root = do_merge_iter->first;
252                 if( has_root ) {
253                     // if there's already a root, then just remove
254                     // this one from the mergeable list
255                     if( target_badness < 0 ) {
256                         // This might be slow.  Maybe use lists instead?
257                         // ( Performance empirically acceptable for now,
258                         // though )
259                         do_merge_iter = merge_vec.erase( do_merge_iter );
260                         continue;
261                     }
262                     target = current_node;
263                     target_badness = -1; // guarantee that we use the root
264                 } else if( (int)current_node->GetFuncName().length() < target_badness ) {
265                     target = current_node;
266                 }
267                 ++do_merge_iter;
268             }
269 
270             do_merge_iter = merge_vec.begin();
271             for( ; do_merge_iter != merge_vec.end(); ++do_merge_iter ) {
272                 // see if the target itself is a root
273                 // (This is different from "has_root" which checks if
274                 // the node or any of its direct or indirect callees are a root )
275                 const bool target_is_itself_a_root =
276                     ( m_RootTraversalNodes.find(target) != m_RootTraversalNodes.end() );
277                 // "Merge()" will properly detect the case of trying to merge a node into itself
278                 target->Merge( do_merge_iter->second,
279                     ( target_is_itself_a_root ?
280                         CTraversalNode::eMergeNameAllowed_ForbidNameChange :
281                         CTraversalNode::eMergeNameAllowed_AllowNameChange ) );
282             }
283             x_AddCallersToTier( target, nextTier );
284         }
285     }
286 }
287 
x_AddCallersToTier(CRef<CTraversalNode> current_node,CTraversalNode::TNodeSet & tier)288 void CTraversalMerger::x_AddCallersToTier(
289     CRef<CTraversalNode> current_node,
290     CTraversalNode::TNodeSet &tier )
291 {
292     const CTraversalNode::TNodeCallSet &callers = current_node->GetCallers();
293     ITERATE( CTraversalNode::TNodeCallSet, caller_iter, callers ) {
294         tier.insert( (*caller_iter)->GetNode() );
295     }
296 }
297 
298 END_NCBI_SCOPE
299