1<?php
2/**
3 * For a thorough description of consistent hashing, see
4 * http://www.spiteful.com/2008/03/17/programmers-toolbox-part-3-consistent-hashing/,
5 * and also the original paper:
6 * http://www8.org/w8-papers/2a-webserver/caching/paper2.html
7 *
8 * Copyright 2007-2017 Horde LLC (http://www.horde.org/)
9 *
10 * @todo Ideas for future enhancement:
11 *   - provide a callback when a point is moved on the circle, so that the
12 *     calling code can take an action (say, transferring data).
13 *
14 * @category Horde
15 * @package  Support
16 * @license  http://www.horde.org/licenses/bsd
17 */
18class Horde_Support_ConsistentHash
19{
20    /**
21     * Number of times to put each node into the hash circle per weight value.
22     * @var integer
23     */
24    protected $_numberOfReplicas = 100;
25
26    /**
27     * Array representing our circle
28     * @var array
29     */
30    protected $_circle = array();
31
32    /**
33     * Numeric indices into the circle by hash position
34     * @var array
35     */
36    protected $_pointMap = array();
37
38    /**
39     * Number of points on the circle
40     * @var integer
41     */
42    protected $_pointCount = 0;
43
44    /**
45     * Array of nodes.
46     * @var array
47     */
48    protected $_nodes = array();
49
50    /**
51     * Number of nodes
52     * @var integer
53     */
54    protected $_nodeCount = 0;
55
56    /**
57     * Create a new consistent hash, with initial $nodes at $numberOfReplicas
58     *
59     * @param array    $nodes             Initial array of nodes to add at $weight.
60     * @param integer  $weight            The weight for the initial node list.
61     * @param integer  $numberOfReplicas  The number of points on the circle to generate for each node.
62     */
63    public function __construct($nodes = array(), $weight = 1, $numberOfReplicas = 100)
64    {
65        $this->_numberOfReplicas = $numberOfReplicas;
66        $this->addNodes($nodes, $weight);
67    }
68
69    /**
70     * Get the primary node for $key.
71     *
72     * @param string $key  The key to look up.
73     *
74     * @param string  The primary node for $key.
75     */
76    public function get($key)
77    {
78        $nodes = $this->getNodes($key, 1);
79        if (!$nodes) {
80            throw new Exception('No nodes found');
81        }
82        return $nodes[0];
83    }
84
85    /**
86     * Get an ordered list of nodes for $key.
87     *
88     * @param string   $key    The key to look up.
89     * @param integer  $count  The number of nodes to look up.
90     *
91     * @return array  An ordered array of nodes.
92     */
93    public function getNodes($key, $count = 5)
94    {
95        // Degenerate cases
96        if ($this->_nodeCount < $count) {
97            throw new Exception('Not enough nodes (have ' . $this->_nodeCount . ', ' . $count . ' requested)');
98        }
99        if ($this->_nodeCount == 0) {
100            return array();
101        }
102
103        // Simple case
104        if ($this->_nodeCount == 1) {
105            return array($this->_nodes[0]['n']);
106        }
107
108        $hash = $this->hash(serialize($key));
109
110        // Find the first point on the circle greater than $hash by binary search.
111        $low = 0;
112        $high = $this->_pointCount - 1;
113        $index = null;
114        while (true) {
115            $mid = (int)(($low + $high) / 2);
116            if ($mid == $this->_pointCount) {
117                $index = 0;
118                break;
119            }
120
121            $midval = $this->_pointMap[$mid];
122            $midval1 = ($mid == 0) ? 0 : $this->_pointMap[$mid - 1];
123            if ($midval1 < $hash && $hash <= $midval) {
124                $index = $mid;
125                break;
126            }
127
128            if ($midval > $hash) {
129                $high = $mid - 1;
130            } else {
131                $low = $mid + 1;
132            }
133
134            if ($low > $high) {
135                $index = 0;
136                break;
137            }
138        }
139
140        $nodes = array();
141        while (count($nodes) < $count) {
142            $nodeIndex = $this->_pointMap[$index++ % $this->_pointCount];
143            $nodes[$nodeIndex] = $this->_nodes[$this->_circle[$nodeIndex]]['n'];
144        }
145        return array_values($nodes);
146    }
147
148    /**
149     * Add $node with weight $weight
150     *
151     * @param mixed $node
152     */
153    public function add($node, $weight = 1)
154    {
155        // Delegate to addNodes so that the circle is only regenerated once when
156        // adding multiple nodes.
157        $this->addNodes(array($node), $weight);
158    }
159
160    /**
161     * Add multiple nodes to the hash with the same weight.
162     *
163     * @param array    $nodes   An array of nodes.
164     * @param integer  $weight  The weight to add the nodes with.
165     */
166    public function addNodes($nodes, $weight = 1)
167    {
168        foreach ($nodes as $node) {
169            $this->_nodes[] = array('n' => $node, 'w' => $weight);
170            $this->_nodeCount++;
171
172            $nodeIndex = $this->_nodeCount - 1;
173            $nodeString = serialize($node);
174
175            $numberOfReplicas = (int)($weight * $this->_numberOfReplicas);
176            for ($i = 0; $i < $numberOfReplicas; $i++) {
177                $this->_circle[$this->hash($nodeString . $i)] = $nodeIndex;
178            }
179        }
180
181        $this->_updateCircle();
182    }
183
184    /**
185     * Remove $node from the hash.
186     *
187     * @param mixed $node
188     */
189    public function remove($node)
190    {
191        $nodeIndex = null;
192        $nodeString = serialize($node);
193
194        // Search for the node in the node list
195        foreach (array_keys($this->_nodes) as $i) {
196            if ($this->_nodes[$i]['n'] === $node) {
197                $nodeIndex = $i;
198                break;
199            }
200        }
201
202        if (is_null($nodeIndex)) {
203            throw new InvalidArgumentException('Node was not in the hash');
204        }
205
206        // Remove all points from the circle
207        $numberOfReplicas = (int)($this->_nodes[$nodeIndex]['w'] * $this->_numberOfReplicas);
208        for ($i = 0; $i < $numberOfReplicas; $i++) {
209            unset($this->_circle[$this->hash($nodeString . $i)]);
210        }
211        $this->_updateCircle();
212
213        // Unset the node from the node list
214        unset($this->_nodes[$nodeIndex]);
215        $this->_nodeCount--;
216    }
217
218    /**
219     * Expose the hash function for testing, probing, and extension.
220     *
221     * @param string $key
222     *
223     * @return string Hash value
224     */
225    public function hash($key)
226    {
227        return 'h' . substr(hash('md5', $key), 0, 8);
228    }
229
230    /**
231     * Maintain the circle and arrays of points.
232     */
233    protected function _updateCircle()
234    {
235        // Sort the circle
236        ksort($this->_circle);
237
238        // Now that the hashes are sorted, generate numeric indices into the
239        // circle.
240        $this->_pointMap = array_keys($this->_circle);
241        $this->_pointCount = count($this->_pointMap);
242    }
243
244}
245