1#!/usr/bin/perl
2
3use strict 'vars';
4use File::Spec;
5
6my $traceLevel = 3;
7
8# whether to box the clusters by sub-folder, but always color nodes regardless
9my @clusterlist = qw(
10   /xml
11   /export
12   /menus
13   /effects/VST
14   /effects/ladspa
15   /effects/lv2
16   /effects/nyquist
17   /effects/vamp
18);
19my %clusters;
20@clusters{@clusterlist} = ();
21sub clustering
22{
23   return exists( $clusters{ $_[0] } );
24}
25
26# whether to prune redundant arcs implied in transitive closure
27my $pruning = 1;
28
29# whether to insert hyperlinks
30my $links = 1;
31
32# Step 1: collect short names and paths to .cpp files
33# We assume that final path components uniquely identify the files!
34my $dir = "../src";
35
36my %names; # string to string
37{
38   foreach my $file (`find $dir -name '*.cpp' -o -name '*.h' -o -name '*.mm'`) {
39      my $short = $file;
40      chop $short;
41      $short =~ s|\.cpp$||;
42      $short =~ s|\.h$||;
43      $short =~ s|\.mm$||;
44      my $shorter = ($short =~ s|^.*/||r);
45      $names{$shorter} = $short;
46   }
47}
48
49#my $linkroot = "https://github.com/audacity/audacity/tree/master/src";
50my $linkroot = "file://" . File::Spec->rel2abs( $dir );
51
52
53print STDERR "Found ", scalar( keys %names ), " filename(s)\n" if $traceLevel >= 1;
54
55# Step 2: collect inclusions in each .cpp/.h pair, and folder information,
56# and build a graph
57my $arcs = 0;
58my %graph; # hash from names to sets of names
59my $grepcmd = "grep '^ *# *include[^\"]*\"[^\"]*\\.h\"'"; # find include directives with quotes
60my $sedcmd = "sed -E 's|^[^\"]*\"([^\"]*)\\.h\".*\$|\\1|'"; # extract quoted path
61my %folders; # build our own tree like the directories
62my $nFolders = 1;
63while( my ($shorter, $short) = each(%names) ) {
64   # find relevant files (.cpp and .h, and sometimes .mm too)
65   my $pat = "${short}.*";
66   my @files = glob $pat;
67
68   # store path information, for subgraph clustering later
69   $short = substr $short, length( $dir ) + 1;
70   my @ownComponents = split '/', $short;
71   my $last = pop @ownComponents;
72   my $folder = \%folders;
73   # this improves the graph in some ways:
74   # files that we just put directly under src should be treated as if in
75   # a separate subfolder.
76   @ownComponents = ("UNCLASSIFIED") if not @ownComponents;
77   # store paths in a hash from strings to references to hashes from strings to references to ...
78   # (ensuring a nonempty set at key "" for each node of this tree)
79   while (@ownComponents) {
80      my $component = shift @ownComponents;
81      if (not exists $$folder{ $component }) {
82         my %empty = ("",());
83         $$folder{ $component } = \%empty;
84         ++$nFolders;
85      }
86      $folder = $$folder{ $component };
87   }
88   # at the last folder, hash empty string specially, to the set of files
89   if (not exists $$folder{ "" }) {
90      my %empty = ("",());
91      $$folder{ "" } = \%empty;
92   }
93   $$folder{""}{$last} = ();
94
95   my %empty;
96   $graph{$shorter} = \%empty; # be sure leaf nodes are not omitted from hash
97   foreach (`cat @files | $grepcmd | $sedcmd`) {
98      chop;
99      my @components = split '/';
100      my $include = $components[-1];
101      # omit self-arcs and arcs to .h files external to the project
102      if (($shorter ne $include) && (exists $names{$include})) {
103         $graph{$shorter}{$include} = (), ++$arcs;
104      }
105   }
106}
107
108print STDERR "Found ", scalar( keys %graph ), " node(s) and ${arcs} arc(s)\n" if $traceLevel >= 1;
109
110# Step 3: compute an acyclic quotient graph
111
112my %quotientMap; # from node name to reference to array of node names
113
114sub SCCID {
115   # given reference to an array of names
116   # use the first name in the array as an ID
117   my $scc = shift;
118   return $$scc[0];
119}
120
121sub SCCLabel {
122   # given reference to an array of names
123   # use concatenation of names as the displayed label
124   my $scc = shift;
125   return join "\n", @$scc;
126}
127
128my %quotientGraph; # to be populated, from SCC ID to array of:
129# [ array of immediately reachable SCC IDs,
130#   array of transitively reachable SCC ids,
131#   rank number ]
132# The first member may be pruned to only those nodes reachable by a longest
133# path of length one
134
135# find strongly connected components with Tarjan's algorithm, which discovers
136# the nodes of the quotient graph in a bottom-up topologically sorted order
137my %temp; # assigns numbers to node names
138my $count = 1;
139my @stack; # names
140my $traceDepth = 0;
141$arcs = 0;
142my $prunedArcs = 0;
143my $maxRank = -1;
144my $largest = 0;
145# three utility procedures for discovery of one s.c.c.
146sub merge {
147   my ($a, $b) = @_;
148   my $na = @$a;
149   my $nb = @$b;
150   my @result;
151   while ($na && $nb) {
152      if ($$a[-$na] lt $$b[-$nb]) {
153         push @result, $$a[-($na--)];
154      }
155      elsif ($$b[-$nb] lt $$a[-$na]) {
156         push @result, $$b[-($nb--)];
157      }
158      else {
159         push @result, $$a[-($na--)]; $nb--;
160      }
161   }
162   push @result, $$a[-($na--)] while $na;
163   push @result, $$b[-($nb--)] while $nb;
164   @result;
165}
166sub diff {
167   my ($a, $b) = @_;
168   my $na = @$a;
169   my $nb = @$b;
170   my @result;
171   while ($na && $nb) {
172      if ($$a[-$na] lt $$b[-$nb]) {
173         push @result, $$a[-($na--)];
174      }
175      elsif ($$b[-$nb] lt $$a[-$na]) {
176         $nb--;
177      }
178      else {
179         $na--; $nb--;
180      }
181   }
182   push @result, $$a[-($na--)] while $na;
183   @result;
184}
185sub discoverOneComponent {
186   my ($sorted, $traceIndent) = @_; # reference to sorted array of names
187   # first populate the quotient map
188   foreach my $node (@$sorted) {
189      $quotientMap{ $node } = $sorted;
190   }
191   # now add arcs to the quotient graph
192   my $qhead = $$sorted[0]; # identifier of quotient node, agreeing with sub SCCID
193   $#{$quotientGraph{ $qhead }} = 2; # reserve results
194   my $data = $quotientGraph{ $qhead }; # reference to results
195   my $rank = -1;
196   my @reachable;
197   my %direct;
198   my @merged;
199   foreach my $node (@$sorted) {
200      my $tails = $graph{ $node };
201      foreach my $tail ( keys %$tails ) {
202         # it is guaranteed that all destination nodes are already in quotientMap,
203         # because of the bottom-up discovery sequence, so this works:
204         my $qtail = SCCID( $quotientMap{ $tail } );
205         $direct{ $qtail } = () if ( $qhead ne $qtail );
206         my $tailData = $quotientGraph{ $qtail };
207         my $tailRank = $$tailData[2];
208         $rank = $tailRank if $tailRank > $rank;
209         @reachable = merge( $$tailData[1], \@reachable );
210      }
211   }
212   ++$rank;
213   my @direct = sort ( keys %direct ); # all direct arcs
214   my @pruned = diff( \@direct, \@reachable ); # all nonredundant direct arcs
215   $prunedArcs += @pruned; # count for trace information
216   $arcs += @direct; # count for trace information
217   @reachable = merge( \@pruned, \@reachable ); # all nodes reachable (excluding self)
218   $$data[0] = $pruning ? \@pruned : \@direct;
219   $$data[1] = \@reachable;
220   $$data[2] = $rank;
221   if ($traceLevel >= 3) {
222      print STDERR "${traceIndent}${qhead}";
223      print STDERR " and ", (scalar(@$sorted) - 1), " other(s)" if scalar(@$sorted) > 1;
224      print STDERR " discovered at rank ${rank}\n";
225   }
226   $maxRank = $rank if $rank > $maxRank;
227   $largest = @$sorted if @$sorted > $largest;
228}
229#recursive procedure
230sub tarjan {
231   my ($name, $num) = @_;
232   my $traceIndent = " " x $traceDepth;
233   if ( exists( $temp{$name} ) ) {
234      # have visited
235      my $number = $temp{$name};
236      if ($number > 0) {
237         #scc not fully known
238         print STDERR "${traceIndent}${name} ${number} revisited\n" if $traceLevel >= 3;
239         return $number;
240      }
241      else {
242         #scc known
243         return $num; # unchanged
244      }
245   }
246   else {
247      # first visit
248      push @stack, $name;
249      $temp{$name} = my $number = $count++;
250      print STDERR "${traceIndent}${name} ${number} discovering\n" if $traceLevel >= 3;
251
252      # recur on directly reachable nodes
253      my $least = $number;
254      my $tails = $graph{$name};
255      ++$traceDepth;
256      foreach my $name2 ( keys %$tails ) {
257         my $result = tarjan( $name2, $number );
258         $least = $result if $result < $least;
259      }
260      --$traceDepth;
261
262      if ($least == $number) {
263         # finished a component (this was the first discovered node in it)
264         my $node;
265         my @scc;
266         do {
267           $node = pop @stack;
268           $temp{ $node } = 0;
269           push @scc, $node;
270         } while( $node ne $name );
271         my @sorted = sort @scc;
272         discoverOneComponent( \@sorted, $traceIndent );
273         return $num; # unchanged
274      }
275      else {
276         # not finished
277         print STDERR "${traceIndent}${name} deferred to ${least}\n" if $traceLevel >= 3;
278         return $least;
279      }
280   }
281}
282# top invocation of recursive procedure discovers all
283foreach my $node ( keys %graph ) {
284   tarjan( $node, 0 );
285}
286#give trace information
287if ($traceLevel >= 1) {
288   print STDERR "Found ", scalar(keys(%quotientGraph)), " strongly connected component(s) in ", (1 + $maxRank), " rank(s)\n";
289   print STDERR "Largest component size is ${largest}\n";
290   print STDERR "${arcs} arc(s) found (${prunedArcs} after pruning)\n";
291}
292
293# Step 4: output the graph in dot language
294print STDERR "Generating .dot file\n" if $traceLevel >= 1;
295
296# temporary redirection
297*OLD_STDOUT = *STDOUT;
298my $fname = "graph.dot";
299open my $fh, ">", $fname or die "Can't open file";
300*STDOUT = $fh;
301
302# header
303my $graphAttr =
304   # $clustering ?
305   "labeljust=l labelloc=b"
306   # : ""
307   ;
308print "strict digraph{ graph [";
309print $graphAttr;
310print " newrank=true";
311#print " mclimit=0.01";
312#print " nslimit=1";
313#print " rank=max";
314#print " rankdir=LR";
315print "]\n";
316print "node [style=filled]";
317
318# nodes and their clusters
319# group the nodes into subgraphs corresponding to directories
320print "\n";
321print "// Nodes\n";
322
323my $hue = 0;
324my $saturation = 1.0;
325my $huestep = 1.0 / $nFolders;
326sub subgraph{
327   my ($foldername, $hashref) = @_;
328   my $clustered = clustering( $foldername );
329   my $cluster = $clustered ? "cluster" : "";
330   my $clusterAttr = $clustered ? "style=bold color=\"blue\"" : "";
331   print STDERR "subgraph \"$foldername\"\n" if $traceLevel >= 3;
332   my $color = "${hue},${saturation},1.0";
333   $hue += $huestep;
334   $saturation = 1.5 - $saturation; # alternate bold and pale
335   my $attrs = $clusterAttr . "label=\"$foldername\"";
336   print "\nsubgraph \"${cluster}${foldername}\" { $attrs node [fillcolor=\"${color}\"]\n";
337   # describe the nodes at this level, stored as a set (i.e. a hash to
338   # don't-care values) at key ""
339   foreach my $name (sort (keys %{$$hashref{""}})) {
340      next unless $name; # ignore dummy element
341      my $scc = $quotientMap{ $name };
342      my $id = SCCID( $scc );
343      # only want the name that is the representative of its s.c.c.
344      # equivalence class
345      next unless $name eq $id;
346      my $label = SCCLabel( $scc );
347      print "   \"${id}\" [label=\"$label\"";
348      # insert other node attributes here as key=value pairs,
349      print " URL=\"${linkroot}${foldername}/${id}.cpp\"" if $links;
350      # separated by spaces
351      print"]\n";
352   }
353   # now recur, to describe nested clusters
354   foreach my $name ( sort( keys %$hashref ) ) {
355     next unless $name; # we just did the special entry at key "" above,
356     # which is a set of leaves at this level, not a subtree
357     subgraph( "${foldername}/${name}", $$hashref{ $name } );
358   }
359   print "}\n";
360}
361subgraph( "", \%folders );
362
363# now describe the arcs
364print "\n";
365print "// Arcs\n";
366
367while( my ($head, $data) = each( %quotientGraph ) ) {
368   foreach my $tail ( @{$$data[0]} ) {
369      print "   \"$head\" -> \"$tail\" [";
370      # insert arc attributes here as key=value pairs,
371      print "penwidth=2.0";
372      # separated by spaces
373      print"]\n";
374   }
375}
376
377#footer
378print "}\n";
379
380# restore
381*STDOUT = *OLD_STDOUT;
382
383# Step 5: generate image
384print STDERR "Generating image...\n" if $traceLevel >= 1;
385my $verbosity = ($traceLevel >= 2) ? "-v" : "";
386`dot $verbosity -O -Tsvg $fname`;
387print STDERR "done\n" if $traceLevel >= 1;
388