1<?php
2
3use Elgg\BatchResult;
4
5/**
6 * A lazy-loading proxy for a result array from a fetching function
7 *
8 * A batch can be counted or iterated over via foreach, where the batch will
9 * internally fetch results several rows at a time. This allows you to efficiently
10 * work on large result sets without loading all results in memory.
11 *
12 * A batch can run operations for any function that supports an options array
13 * and supports the keys "offset", "limit", and "count". This is usually used
14 * with elgg_get_entities() and friends, elgg_get_annotations(), and
15 * elgg_get_metadata(). In fact, those functions will return results as
16 * batches by passing in "batch" as true.
17 *
18 * Unlike a real array, direct access of results is not supported.
19 *
20 * If you pass a valid PHP callback, all results will be run through that
21 * callback. You can still foreach() through the result set after.  Valid
22 * PHP callbacks can be a string, an array, or a closure.
23 * {@link http://php.net/manual/en/language.pseudo-types.php}
24 *
25 * The callback function must accept 3 arguments: an entity, the getter
26 * used, and the options used.
27 *
28 * Results from the callback are stored in callbackResult. If the callback
29 * returns only booleans, callbackResults will be the combined result of
30 * all calls. If no entities are processed, callbackResults will be null.
31 *
32 * If the callback returns anything else, callbackresult will be an indexed
33 * array of whatever the callback returns.  If returning error handling
34 * information, you should include enough information to determine which
35 * result you're referring to.
36 *
37 * Don't combine returning bools and returning something else.
38 *
39 * Note that returning false will not stop the foreach.
40 *
41 * @warning If your callback or foreach loop deletes or disable entities
42 * you MUST call setIncrementOffset(false) or set that when instantiating.
43 * This forces the offset to stay what it was in the $options array.
44 *
45 * @example
46 * <code>
47 * // using foreach
48 * $batch = new \ElggBatch('elgg_get_entities', array());
49 * $batch->setIncrementOffset(false);
50 *
51 * foreach ($batch as $entity) {
52 * 	   $entity->disable();
53 * }
54 *
55 * // using both a callback
56 * $callback = function($result, $getter, $options) {
57 * 	   var_dump("Looking at annotation id: $result->id");
58 *     return true;
59 * }
60 *
61 * $batch = new \ElggBatch('elgg_get_annotations', array('guid' => 2), $callback);
62 *
63 * // get a batch from an Elgg getter function
64 * $batch = elgg_get_entities([
65 *     'batch' => true,
66 * ]);
67 * </code>
68 *
69 * @since 1.8
70 */
71class ElggBatch implements BatchResult {
72
73	/**
74	 * The objects to iterate over.
75	 *
76	 * @var array
77	 */
78	private $results = [];
79
80	/**
81	 * The function used to get results.
82	 *
83	 * @var callable
84	 */
85	private $getter = null;
86
87	/**
88	 * The given $options to alter and pass to the getter.
89	 *
90	 * @var array
91	 */
92	private $options = [];
93
94	/**
95	 * The number of results to grab at a time.
96	 *
97	 * @var int
98	 */
99	private $chunkSize = 25;
100
101	/**
102	 * A callback function to pass results through.
103	 *
104	 * @var callable
105	 */
106	private $callback = null;
107
108	/**
109	 * Start after this many results.
110	 *
111	 * @var int
112	 */
113	private $offset = 0;
114
115	/**
116	 * Stop after this many results.
117	 *
118	 * @var int
119	 */
120	private $limit = 0;
121
122	/**
123	 * Number of processed results.
124	 *
125	 * @var int
126	 */
127	private $retrievedResults = 0;
128
129	/**
130	 * The index of the current result within the current chunk
131	 *
132	 * @var int
133	 */
134	private $resultIndex = 0;
135
136	/**
137	 * The index of the current chunk
138	 *
139	 * @var int
140	 */
141	private $chunkIndex = 0;
142
143	/**
144	 * The number of results iterated through
145	 *
146	 * @var int
147	 */
148	private $processedResults = 0;
149
150	/**
151	 * Is the getter a valid callback
152	 *
153	 * @var bool
154	 */
155	private $validGetter = null;
156
157	/**
158	 * The result of running all entities through the callback function.
159	 *
160	 * @var mixed
161	 */
162	public $callbackResult = null;
163
164	/**
165	 * If false, offset will not be incremented. This is used for callbacks/loops that delete.
166	 *
167	 * @var bool
168	 */
169	private $incrementOffset = true;
170
171	/**
172	 * Entities that could not be instantiated during a fetch
173	 *
174	 * @var \stdClass[]
175	 */
176	private $incompleteEntities = [];
177
178	/**
179	 * Total number of incomplete entities fetched
180	 *
181	 * @var int
182	 */
183	private $totalIncompletes = 0;
184
185	/**
186	 * Batches operations on any elgg_get_*() or compatible function that supports
187	 * an options array.
188	 *
189	 * Instead of returning all objects in memory, it goes through $chunk_size
190	 * objects, then requests more from the server.  This avoids OOM errors.
191	 *
192	 * @param callable $getter     The function used to get objects.  Usually
193	 *                           an elgg_get_*() function, but can be any valid PHP callback.
194	 * @param array  $options    The options array to pass to the getter function. If limit is
195	 *                           not set, 10 is used as the default. In most cases that is not
196	 *                           what you want.
197	 * @param mixed  $callback   An optional callback function that all results will be passed
198	 *                           to upon load.  The callback needs to accept $result, $getter,
199	 *                           $options.
200	 * @param int    $chunk_size The number of entities to pull in before requesting more.
201	 *                           You have to balance this between running out of memory in PHP
202	 *                           and hitting the db server too often.
203	 * @param bool   $inc_offset Increment the offset on each fetch. This must be false for
204	 *                           callbacks that delete rows. You can set this after the
205	 *                           object is created with {@link \ElggBatch::setIncrementOffset()}.
206	 */
207	public function __construct(callable $getter, $options, $callback = null, $chunk_size = 25,
208			$inc_offset = true) {
209
210		$this->getter = $getter;
211		$this->options = $options;
212		$this->callback = $callback;
213		$this->chunkSize = $chunk_size;
214		$this->setIncrementOffset($inc_offset);
215
216		if ($this->chunkSize <= 0) {
217			$this->chunkSize = 25;
218		}
219
220		// store these so we can compare later
221		$this->offset = elgg_extract('offset', $options, 0);
222		$this->limit = elgg_extract('limit', $options, _elgg_config()->default_limit);
223
224		// if passed a callback, create a new \ElggBatch with the same options
225		// and pass each to the callback.
226		if ($callback && is_callable($callback)) {
227			$batch = new \ElggBatch($getter, $options, null, $chunk_size, $inc_offset);
228
229			$all_results = null;
230
231			foreach ($batch as $result) {
232				$result = call_user_func($callback, $result, $getter, $options);
233
234				if (!isset($all_results)) {
235					if ($result === true || $result === false || $result === null) {
236						$all_results = $result;
237					} else {
238						$all_results = [];
239					}
240				}
241
242				if (($result === true || $result === false || $result === null) && !is_array($all_results)) {
243					$all_results = $result && $all_results;
244				} else {
245					$all_results[] = $result;
246				}
247			}
248
249			$this->callbackResult = $all_results;
250		}
251	}
252
253	/**
254	 * Fetches the next chunk of results
255	 *
256	 * @return bool
257	 */
258	private function getNextResultsChunk() {
259
260		// always reset results.
261		$this->results = [];
262
263		if (!isset($this->validGetter)) {
264			$this->validGetter = is_callable($this->getter);
265		}
266
267		if (!$this->validGetter) {
268			return false;
269		}
270
271		$limit = $this->chunkSize;
272
273		// if someone passed limit = 0 they want everything.
274		if ($this->limit != 0) {
275			if ($this->retrievedResults >= $this->limit) {
276				return false;
277			}
278
279			// if original limit < chunk size, set limit to original limit
280			// else if the number of results we'll fetch if greater than the original limit
281			if ($this->limit < $this->chunkSize) {
282				$limit = $this->limit;
283			} elseif ($this->retrievedResults + $this->chunkSize > $this->limit) {
284				// set the limit to the number of results remaining in the original limit
285				$limit = $this->limit - $this->retrievedResults;
286			}
287		}
288
289		if ($this->incrementOffset) {
290			$offset = $this->offset + $this->retrievedResults;
291		} else {
292			$offset = $this->offset + $this->totalIncompletes;
293		}
294
295		$current_options = [
296			'limit' => $limit,
297			'offset' => $offset,
298			'__ElggBatch' => $this,
299		];
300
301		$options = array_merge($this->options, $current_options);
302
303		$this->incompleteEntities = [];
304		$this->results = call_user_func($this->getter, $options);
305
306		// batch result sets tend to be large; we don't want to cache these.
307		_elgg_services()->queryCache->disable();
308
309		$num_results = count($this->results);
310		$num_incomplete = count($this->incompleteEntities);
311
312		$this->totalIncompletes += $num_incomplete;
313
314		if (!empty($this->incompleteEntities)) {
315			// pad the front of the results with nulls representing the incompletes
316			array_splice($this->results, 0, 0, array_pad([], $num_incomplete, null));
317			// ...and skip past them
318			reset($this->results);
319			for ($i = 0; $i < $num_incomplete; $i++) {
320				next($this->results);
321			}
322		}
323
324		if ($this->results) {
325			$this->chunkIndex++;
326
327			// let the system know we've jumped past the nulls
328			$this->resultIndex = $num_incomplete;
329
330			$this->retrievedResults += ($num_results + $num_incomplete);
331			if ($num_results == 0) {
332				// This fetch was *all* incompletes! We need to fetch until we can either
333				// offer at least one row to iterate over, or give up.
334				return $this->getNextResultsChunk();
335			}
336			_elgg_services()->queryCache->enable();
337			return true;
338		} else {
339			_elgg_services()->queryCache->enable();
340			return false;
341		}
342	}
343
344	/**
345	 * Increment the offset from the original options array? Setting to
346	 * false is required for callbacks that delete rows.
347	 *
348	 * @param bool $increment Set to false when deleting data
349	 * @return void
350	 */
351	public function setIncrementOffset($increment = true) {
352		$this->incrementOffset = (bool) $increment;
353	}
354
355	/**
356	 * Set chunk size
357	 * @param int $size Size
358	 * @return void
359	 */
360	public function setChunkSize($size = 25) {
361		$this->chunkSize = $size;
362	}
363	/**
364	 * Implements Iterator
365	 */
366
367	/**
368	 * {@inheritdoc}
369	 */
370	public function rewind() {
371		$this->resultIndex = 0;
372		$this->retrievedResults = 0;
373		$this->processedResults = 0;
374
375		// only grab results if we haven't yet or we're crossing chunks
376		if ($this->chunkIndex == 0 || $this->limit > $this->chunkSize) {
377			$this->chunkIndex = 0;
378			$this->getNextResultsChunk();
379		}
380	}
381
382	/**
383	 * {@inheritdoc}
384	 */
385	public function current() {
386		return current($this->results);
387	}
388
389	/**
390	 * {@inheritdoc}
391	 */
392	public function key() {
393		return $this->processedResults;
394	}
395
396	/**
397	 * {@inheritdoc}
398	 */
399	public function next() {
400		// if we'll be at the end.
401		if (($this->processedResults + 1) >= $this->limit && $this->limit > 0) {
402			$this->results = [];
403			return false;
404		}
405
406		// if we'll need new results.
407		if (($this->resultIndex + 1) >= $this->chunkSize) {
408			if (!$this->getNextResultsChunk()) {
409				$this->results = [];
410				return false;
411			}
412
413			$result = current($this->results);
414		} else {
415			// the function above resets the indexes, so only inc if not
416			// getting new set
417			$this->resultIndex++;
418			$result = next($this->results);
419		}
420
421		$this->processedResults++;
422		return $result;
423	}
424
425	/**
426	 * {@inheritdoc}
427	 */
428	public function valid() {
429		if (!is_array($this->results)) {
430			return false;
431		}
432		$key = key($this->results);
433		return ($key !== null && $key !== false);
434	}
435
436	/**
437	 * Count the total results available at this moment.
438	 *
439	 * As this performs a separate query, the count returned may not match the number of results you can
440	 * fetch via iteration on a very active DB.
441	 *
442	 * @see Countable::count()
443	 * @return int
444	 */
445	public function count() {
446		if (!is_callable($this->getter)) {
447			$inspector = new \Elgg\Debug\Inspector();
448			throw new RuntimeException("Getter is not callable: " . $inspector->describeCallable($this->getter));
449		}
450
451		$options = array_merge($this->options, ['count' => true]);
452
453		return call_user_func($this->getter, $options);
454	}
455}
456