1 // Copyright 2004 The Trustees of Indiana University.
2 
3 // Use, modification and distribution is subject to the Boost Software
4 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
5 // http://www.boost.org/LICENSE_1_0.txt)
6 
7 //  Authors: Douglas Gregor
8 //           Andrew Lumsdaine
9 
10 // This program performs betweenness centrality (BC) clustering on the
11 // actor collaboration graph available at
12 // http://www.nd.edu/~networks/resources/actor/actor.dat.gz and outputs the
13 // result of clustering in Pajek format.
14 //
15 // This program mimics the BC clustering algorithm program implemented
16 // by Shashikant Penumarthy for JUNG, so that we may compare results
17 // and timings.
18 #include <boost/graph/bc_clustering.hpp>
19 #include <boost/graph/adjacency_list.hpp>
20 #include <boost/graph/graph_traits.hpp>
21 #include <fstream>
22 #include <iostream>
23 #include <string>
24 #include <boost/tokenizer.hpp>
25 #include <boost/lexical_cast.hpp>
26 #include <map>
27 
28 using namespace boost;
29 
30 struct Actor
31 {
ActorActor32   Actor(int id = -1) : id(id) {}
33 
34   int id;
35 };
36 
37 typedef adjacency_list<vecS, vecS, undirectedS, Actor,
38                        property<edge_centrality_t, double> > ActorGraph;
39 typedef graph_traits<ActorGraph>::vertex_descriptor Vertex;
40 typedef graph_traits<ActorGraph>::edge_descriptor Edge;
41 
load_actor_graph(std::istream & in,ActorGraph & g)42 void load_actor_graph(std::istream& in, ActorGraph& g)
43 {
44   std::map<int, Vertex> actors;
45 
46   std::string line;
47   while (getline(in, line)) {
48     std::vector<Vertex> actors_in_movie;
49 
50     // Map from the actor numbers on this line to the actor vertices
51     typedef tokenizer<char_separator<char> > Tok;
52     Tok tok(line, char_separator<char>(" "));
53     for (Tok::iterator id = tok.begin(); id != tok.end(); ++id) {
54       int actor_id = lexical_cast<int>(*id);
55       std::map<int, Vertex>::iterator v = actors.find(actor_id);
56       if (v == actors.end()) {
57         Vertex new_vertex = add_vertex(Actor(actor_id), g);
58         actors[actor_id] = new_vertex;
59         actors_in_movie.push_back(new_vertex);
60       } else {
61         actors_in_movie.push_back(v->second);
62       }
63     }
64 
65     for (std::vector<Vertex>::iterator i = actors_in_movie.begin();
66          i != actors_in_movie.end(); ++i) {
67       for (std::vector<Vertex>::iterator j = i + 1;
68            j != actors_in_movie.end(); ++j) {
69         if (!edge(*i, *j, g).second) add_edge(*i, *j, g);
70       }
71     }
72   }
73 }
74 
75 template<typename Graph, typename VertexIndexMap, typename VertexNameMap>
76 std::ostream&
write_pajek_graph(std::ostream & out,const Graph & g,VertexIndexMap vertex_index,VertexNameMap vertex_name)77 write_pajek_graph(std::ostream& out, const Graph& g,
78                   VertexIndexMap vertex_index, VertexNameMap vertex_name)
79 {
80   out << "*Vertices " << num_vertices(g) << '\n';
81   typedef typename graph_traits<Graph>::vertex_iterator vertex_iterator;
82   for (vertex_iterator v = vertices(g).first; v != vertices(g).second; ++v) {
83     out << get(vertex_index, *v)+1 << " \"" << get(vertex_name, *v) << "\"\n";
84   }
85 
86   out << "*Edges\n";
87   typedef typename graph_traits<Graph>::edge_iterator edge_iterator;
88   for (edge_iterator e = edges(g).first; e != edges(g).second; ++e) {
89     out << get(vertex_index, source(*e, g))+1 << ' '
90         << get(vertex_index, target(*e, g))+1 << " 1.0\n"; // HACK!
91   }
92   return out;
93 }
94 
95 class actor_clustering_threshold : public bc_clustering_threshold<double>
96 {
97   typedef bc_clustering_threshold<double> inherited;
98 
99  public:
actor_clustering_threshold(double threshold,const ActorGraph & g,bool normalize)100   actor_clustering_threshold(double threshold, const ActorGraph& g,
101                              bool normalize)
102     : inherited(threshold, g, normalize), iter(1) { }
103 
operator ()(double max_centrality,Edge e,const ActorGraph & g)104   bool operator()(double max_centrality, Edge e, const ActorGraph& g)
105   {
106     std::cout << "Iter: " << iter << " Max Centrality: "
107               << (max_centrality / dividend) << std::endl;
108     ++iter;
109     return inherited::operator()(max_centrality, e, g);
110   }
111 
112  private:
113   unsigned int iter;
114 };
115 
main(int argc,char * argv[])116 int main(int argc, char* argv[])
117 {
118   std::string in_file;
119   std::string out_file;
120   double threshold = -1.0;
121   bool normalize = false;
122 
123   // Parse command-line options
124   {
125     int on_arg = 1;
126     while (on_arg < argc) {
127       std::string arg(argv[on_arg]);
128       if (arg == "-in") {
129         ++on_arg; assert(on_arg < argc);
130         in_file = argv[on_arg];
131       } else if (arg == "-out") {
132         ++on_arg; assert(on_arg < argc);
133         out_file = argv[on_arg];
134       } else if (arg == "-threshold") {
135         ++on_arg; assert(on_arg < argc);
136         threshold = lexical_cast<double>(argv[on_arg]);
137       } else if (arg == "-normalize") {
138         normalize = true;
139       } else {
140         std::cerr << "Unrecognized parameter \"" << arg << "\".\n";
141         return -1;
142       }
143       ++on_arg;
144     }
145 
146     if (in_file.empty() || out_file.empty() || threshold < 0) {
147       std::cerr << "error: syntax is actor_clustering [options]\n\n"
148                 << "options are:\n"
149                 << "\t-in <infile>\tInput file\n"
150                 << "\t-out <outfile>\tOutput file\n"
151                 << "\t-threshold <value>\tA threshold value\n"
152                 << "\t-normalize\tNormalize edge centrality scores\n";
153       return -1;
154     }
155   }
156 
157   ActorGraph g;
158 
159   // Load the actor graph
160   {
161     std::cout << "Building graph." << std::endl;
162     std::ifstream in(in_file.c_str());
163     if (!in) {
164       std::cerr << "Unable to open file \"" << in_file << "\" for input.\n";
165       return -2;
166     }
167     load_actor_graph(in, g);
168   }
169 
170   // Run the algorithm
171   std::cout << "Clusting..." << std::endl;
172   betweenness_centrality_clustering(g,
173     actor_clustering_threshold(threshold, g, normalize),
174     get(edge_centrality, g));
175 
176   // Output the graph
177   {
178     std::cout << "Writing graph to file: " << out_file << std::endl;
179     std::ofstream out(out_file.c_str());
180     if (!out) {
181       std::cerr << "Unable to open file \"" << out_file << "\" for output.\n";
182       return -3;
183     }
184     write_pajek_graph(out, g, get(vertex_index, g), get(&Actor::id, g));
185   }
186   return 0;
187 }
188