1__docformat__ = "restructuredtext en"
2
3from mdp import numx, numx_rand, utils, graph, Node
4
5class _NGNodeData(object):
6    """Data associated to a node in a Growing Neural Gas graph."""
7    def __init__(self, pos, error=0.0, hits=0, label=None):
8        # reference vector (spatial position)
9        self.pos = pos
10        # cumulative error
11        self.cum_error = error
12        self.hits = hits
13        self.label = label
14
15class _NGEdgeData(object):
16    """Data associated to an edge in a Growing Neural Gas graph."""
17    def __init__(self, age=0):
18        self.age = age
19    def inc_age(self):
20        self.age += 1
21
22
23class GrowingNeuralGasNode(Node):
24    """Learn the topological structure of the input data by building a
25    corresponding graph approximation.
26
27    The algorithm expands on the original Neural Gas algorithm
28    (see mdp.nodes NeuralGasNode) in that the algorithm adds new nodes are
29    added to the graph as more data becomes available. Im this way,
30    if the growth rate is appropriate, one can avoid overfitting  or
31    underfitting the data.
32
33    More information about the Growing Neural Gas algorithm can be found in
34    B. Fritzke, A Growing Neural Gas Network Learns Topologies, in G. Tesauro,
35    D. S. Touretzky, and T. K. Leen (editors), Advances in Neural Information
36    Processing Systems 7, pages 625-632. MIT Press, Cambridge MA, 1995.
37
38    **Attributes and methods of interest**
39
40    - graph -- The corresponding `mdp.graph.Graph` object
41    """
42    def __init__(self, start_poss=None, eps_b=0.2, eps_n=0.006, max_age=50,
43                 lambda_=100, alpha=0.5, d=0.995, max_nodes=2147483647,
44                 input_dim=None, dtype=None):
45        """Growing Neural Gas algorithm.
46
47        :Parameters:
48
49          start_poss
50            sequence of two arrays containing the position of the
51            first two nodes in the GNG graph. If unspecified, the
52            initial nodes are chosen with a random position generated
53            from a gaussian distribution with zero mean and unit
54            variance.
55
56          eps_b
57            coefficient of movement of the nearest node to a new data
58            point. Typical values are 0 < eps_b << 1 .
59
60            Default: 0.2
61
62          eps_n
63            coefficient of movement of the neighbours of the nearest
64            node to a new data point. Typical values are
65            0 < eps_n << eps_b .
66
67            Default: 0.006
68
69          max_age
70            remove an edge after `max_age` updates. Typical values are
71            10 < max_age < lambda.
72
73            Default: 50
74
75          `lambda_`
76            insert a new node after `lambda_` steps. Typical values are O(100).
77
78            Default: 100
79
80          alpha
81            when a new node is inserted, multiply the error of the
82            nodes from which it generated by 0<alpha<1. A typical value
83            is 0.5.
84
85            Default: 0.5
86
87          d
88            each step the error of the nodes are multiplied by 0<d<1.
89            Typical values are close to 1.
90
91            Default: 0.995
92
93          max_nodes
94            maximal number of nodes in the graph.
95
96            Default: 2^31 - 1
97        """
98        self.graph = graph.Graph()
99        self.tlen = 0
100
101        #copy parameters
102        (self.eps_b, self.eps_n, self.max_age, self.lambda_, self.alpha,
103         self.d, self.max_nodes) = (eps_b, eps_n, max_age, lambda_, alpha,
104                                    d, max_nodes)
105
106        super(GrowingNeuralGasNode, self).__init__(input_dim, None, dtype)
107
108        if start_poss is not None:
109            if self.dtype is None:
110                self.dtype = start_poss[0].dtype
111            node1 = self._add_node(self._refcast(start_poss[0]))
112            node2 = self._add_node(self._refcast(start_poss[1]))
113            self._add_edge(node1, node2)
114
115    def _set_input_dim(self, n):
116        self._input_dim = n
117        self.output_dim = n
118
119    def _add_node(self, pos):
120        node = self.graph.add_node(_NGNodeData(pos))
121        return node
122
123    def _add_edge(self, from_, to_):
124        self.graph.add_edge(from_, to_, _NGEdgeData())
125
126    def _get_nearest_nodes(self, x):
127        """Return the two nodes in the graph that are nearest to x and their
128        squared distances. (Return ([node1, node2], [dist1, dist2])"""
129        # distance function
130        def _distance_from_node(node):
131            #return norm(node.data.pos-x)**2
132            tmp = node.data.pos - x
133            return utils.mult(tmp, tmp)
134        g = self.graph
135        # distances of all graph nodes from x
136        distances = numx.array(map(_distance_from_node, g.nodes))
137        ids = distances.argsort()[:2]
138        #nearest = [g.nodes[idx] for idx in ids]
139        #return nearest, distances[ids]
140        return (g.nodes[ids[0]], g.nodes[ids[1]]), distances.take(ids)
141
142    def _move_node(self, node, x, eps):
143        """Move a node by eps in the direction x."""
144        # ! make sure that eps already has the right dtype
145        node.data.pos += eps*(x - node.data.pos)
146
147    def _remove_old_edges(self, edges):
148        """Remove all edges older than the maximal age."""
149        g, max_age = self.graph, self.max_age
150        for edge in edges:
151            if edge.data.age > max_age:
152                g.remove_edge(edge)
153                if edge.head.degree() == 0:
154                    g.remove_node(edge.head)
155                if edge.tail.degree() == 0:
156                    g.remove_node(edge.tail)
157
158    def _insert_new_node(self):
159        """Insert a new node in the graph where it is more necessary (i.e.
160        where the error is the largest)."""
161        g = self.graph
162        # determine the node with the highest error
163        errors = map(lambda x: x.data.cum_error, g.nodes)
164        qnode = g.nodes[numx.argmax(errors)]
165        # determine the neighbour with the highest error
166        neighbors = qnode.neighbors()
167        errors = map(lambda x: x.data.cum_error, neighbors)
168        fnode = neighbors[numx.argmax(errors)]
169        # new node, halfway between the worst node and the worst of
170        # its neighbors
171        new_pos = 0.5*(qnode.data.pos + fnode.data.pos)
172        new_node = self._add_node(new_pos)
173        # update edges
174        edges = qnode.get_edges(neighbor=fnode)
175        g.remove_edge(edges[0])
176        self._add_edge(qnode, new_node)
177        self._add_edge(fnode, new_node)
178        # update errors
179        qnode.data.cum_error *= self.alpha
180        fnode.data.cum_error *= self.alpha
181        new_node.data.cum_error = 0.5*(qnode.data.cum_error+
182                                       fnode.data.cum_error)
183
184    def get_nodes_position(self):
185        return numx.array(map(lambda n: n.data.pos, self.graph.nodes),
186                          dtype = self.dtype)
187
188    def _train(self, input):
189        g = self.graph
190        d = self.d
191
192        if len(g.nodes)==0:
193            # if missing, generate two initial nodes at random
194            # assuming that the input data has zero mean and unit variance,
195            # choose the random position according to a gaussian distribution
196            # with zero mean and unit variance
197            normal = numx_rand.normal
198            self._add_node(self._refcast(normal(0.0, 1.0, self.input_dim)))
199            self._add_node(self._refcast(normal(0.0, 1.0, self.input_dim)))
200
201        # loop on single data points
202        for x in input:
203            self.tlen += 1
204
205            # step 2 - find the nearest nodes
206            # dists are the squared distances of x from n0, n1
207            (n0, n1), dists = self._get_nearest_nodes(x)
208
209            # step 3 - increase age of the emanating edges
210            for e in n0.get_edges():
211                e.data.inc_age()
212
213            # step 4 - update error
214            n0.data.cum_error += numx.sqrt(dists[0])
215
216            # step 5 - move nearest node and neighbours
217            self._move_node(n0, x, self.eps_b)
218            # neighbors undirected
219            neighbors = n0.neighbors()
220            for n in neighbors:
221                self._move_node(n, x, self.eps_n)
222
223            # step 6 - update n0<->n1 edge
224            if n1 in neighbors:
225                # should be one edge only
226                edges = n0.get_edges(neighbor=n1)
227                edges[0].data.age = 0
228            else:
229                self._add_edge(n0, n1)
230
231            # step 7 - remove old edges
232            self._remove_old_edges(n0.get_edges())
233
234            # step 8 - add a new node each lambda steps
235            if not self.tlen % self.lambda_ and len(g.nodes) < self.max_nodes:
236                self._insert_new_node()
237
238            # step 9 - decrease errors
239            for node in g.nodes:
240                node.data.cum_error *= d
241
242    def nearest_neighbor(self, input):
243        """Assign each point in the input data to the nearest node in
244        the graph. Return the list of the nearest node instances, and
245        the list of distances.
246        Executing this function will close the training phase if
247        necessary."""
248        super(GrowingNeuralGasNode, self).execute(input)
249
250        nodes = []
251        dists = []
252        for x in input:
253            (n0, _), dist = self._get_nearest_nodes(x)
254            nodes.append(n0)
255            dists.append(numx.sqrt(dist[0]))
256        return nodes, dists
257
258class NeuralGasNode(GrowingNeuralGasNode):
259    """Learn the topological structure of the input data by building a
260    corresponding graph approximation (original Neural Gas algorithm).
261
262    The Neural Gas algorithm was originally published in Martinetz, T. and
263    Schulten, K.: A "Neural-Gas" Network Learns Topologies. In Kohonen, T.,
264    Maekisara, K., Simula, O., and Kangas, J. (eds.), Artificial Neural
265    Networks. Elsevier, North-Holland., 1991.
266
267    **Attributes and methods of interest**
268
269    - graph -- The corresponding `mdp.graph.Graph` object
270    - max_epochs - maximum number of epochs until which to train.
271    """
272    def __init__(self, num_nodes = 10,
273                       start_poss=None,
274                       epsilon_i=0.3,               # initial epsilon
275                       epsilon_f=0.05,              # final epsilon
276                       lambda_i=30.,                # initial lambda
277                       lambda_f=0.01,               # final lambda
278                       max_age_i=20,                # initial edge lifetime
279                       max_age_f=200,               # final edge lifetime
280                       max_epochs=100,
281                       n_epochs_to_train=None,
282                       input_dim=None,
283                       dtype=None):
284        """Neural Gas algorithm.
285
286        Default parameters taken from the original publication.
287
288        :Parameters:
289
290          start_poss
291            sequence of two arrays containing the position of the
292            first two nodes in the GNG graph. In unspecified, the
293            initial nodes are chosen with a random position generated
294            from a gaussian distribution with zero mean and unit
295            variance.
296
297          num_nodes
298            number of nodes to use. Ignored if start_poss is given.
299
300          epsilon_i, epsilon_f
301            initial and final values of epsilon. Fraction of the distance
302            between the closest node and the presented data point by which the
303            node moves towards the data point in an adaptation step. Epsilon
304            decays during training by e(t) = e_i(e_f/e_i)^(t/t_max) with t
305            being the epoch.
306
307          lambda_i, lambda_f
308            initial and final values of lambda. Lambda influences how the
309            weight change of nodes in the ranking decreases with lower rank. It
310            is sometimes called the "neighborhood factor". Lambda decays during
311            training in the same manner as epsilon does.
312
313          max_age_i, max_age_f
314            Initial and final lifetime, after which an edge will be removed.
315            Lifetime is measured in terms of adaptation steps, i.e.,
316            presentations of data points. It decays during training like
317            epsilon does.
318
319          max_epochs
320            number of epochs to train. One epoch has passed when all data points
321            from the input have been presented once. The default in the original
322            publication was 40000, but since this has proven to be impractically
323            high too high for many real-world data sets, we adopted a default
324            value of 100.
325
326          n_epochs_to_train
327            number of epochs to train on each call. Useful for batch learning
328            and for visualization of the training process. Default is to
329            train once until max_epochs is reached.
330        """
331
332        self.graph = graph.Graph()
333
334        if n_epochs_to_train is None:
335            n_epochs_to_train = max_epochs
336
337        #copy parameters
338        self.num_nodes = num_nodes
339        self.start_poss = start_poss
340        self.epsilon_i = epsilon_i
341        self.epsilon_f = epsilon_f
342        self.lambda_i = lambda_i
343        self.lambda_f = lambda_f
344        self.max_age_i = max_age_i
345        self.max_age_f = max_age_f
346        self.max_epochs = max_epochs
347        self.n_epochs_to_train = n_epochs_to_train
348
349        super(GrowingNeuralGasNode, self).__init__(input_dim, None, dtype)
350
351        if start_poss is not None:
352            if self.num_nodes != len(start_poss):
353                self.num_nodes = len(start_poss)
354            if self.dtype is None:
355                self.dtype = start_poss[0].dtype
356            for node_ind in range(self.num_nodes):
357                self._add_node(self._refcast(start_poss[node_ind]))
358        self.epoch = 0
359
360
361    def _train(self, input):
362        g = self.graph
363
364        if len(g.nodes) == 0:
365            # if missing, generate num_nodes initial nodes at random
366            # assuming that the input data has zero mean and unit variance,
367            # choose the random position according to a gaussian distribution
368            # with zero mean and unit variance
369            normal = numx_rand.normal
370            for _ in range(self.num_nodes):
371                self._add_node(self._refcast(normal(0.0, 1.0, self.input_dim)))
372
373        epoch = self.epoch
374        e_i = self.epsilon_i
375        e_f = self.epsilon_f
376        l_i = self.lambda_i
377        l_f = self.lambda_f
378        T_i = float(self.max_age_i)
379        T_f = float(self.max_age_f)
380        max_epochs = float(self.max_epochs)
381        remaining_epochs = self.n_epochs_to_train
382        while remaining_epochs > 0:
383            # reset permutation of data points
384            di = numx.random.permutation(input)
385            if epoch < max_epochs:
386                denom = epoch/max_epochs
387            else:
388                denom = 1.
389            epsilon = e_i * ((e_f/e_i)**denom)
390            lmbda = l_i * ((l_f/l_i)**denom)
391            T = T_i * ((T_f/T_i)**denom)
392            epoch += 1
393            for x in di:
394                # Step 1 rank nodes according to their distance to random point
395                ranked_nodes = self._rank_nodes_by_distance(x)
396
397                # Step 2 move nodes
398                for rank,node in enumerate(ranked_nodes):
399                    #TODO: cut off at some rank when using many nodes
400                    #TODO: check speedup by vectorizing
401                    delta_w = epsilon * numx.exp(-rank / lmbda) * \
402                                    (x - node.data.pos)
403                    node.data.pos += delta_w
404
405                # Step 3 update edge weight
406                for e in g.edges:
407                    e.data.inc_age()
408
409                # Step 4 set age of edge between first two nodes to zero
410                #  or create it if it doesn't exist.
411                n0 = ranked_nodes[0]
412                n1 = ranked_nodes[1]
413                nn = n0.neighbors()
414                if n1 in nn:
415                    edges = n0.get_edges(neighbor=n1)
416                    edges[0].data.age = 0  # should only be one edge
417                else:
418                    self._add_edge(n0, n1)
419
420                # step 5 delete edges with age > max_age
421                self._remove_old_edges(max_age=T)
422            remaining_epochs -= 1
423        self.epoch = epoch
424
425
426    def _rank_nodes_by_distance(self, x):
427        """Return the nodes in the graph in a list ranked by their squared
428        distance to x. """
429
430        #TODO: Refactor together with GNGNode._get_nearest_nodes
431
432        # distance function
433        def _distance_from_node(node):
434            tmp = node.data.pos - x
435            return utils.mult(tmp, tmp) # maps to mdp.numx.dot
436
437        g = self.graph
438
439        # distances of all graph nodes from x
440        distances = numx.array(map(_distance_from_node, g.nodes))
441        ids = distances.argsort()
442        ranked_nodes = [g.nodes[id] for id in ids]
443
444        return ranked_nodes
445
446
447    def _remove_old_edges(self, max_age):
448        """Remove edges with age > max_age."""
449        g = self.graph
450        for edge in self.graph.edges:
451            if edge.data.age > max_age:
452                g.remove_edge(edge)
453