1<?php 2 3use Elgg\BatchResult; 4 5/** 6 * A lazy-loading proxy for a result array from a fetching function 7 * 8 * A batch can be counted or iterated over via foreach, where the batch will 9 * internally fetch results several rows at a time. This allows you to efficiently 10 * work on large result sets without loading all results in memory. 11 * 12 * A batch can run operations for any function that supports an options array 13 * and supports the keys "offset", "limit", and "count". This is usually used 14 * with elgg_get_entities() and friends, elgg_get_annotations(), and 15 * elgg_get_metadata(). In fact, those functions will return results as 16 * batches by passing in "batch" as true. 17 * 18 * Unlike a real array, direct access of results is not supported. 19 * 20 * If you pass a valid PHP callback, all results will be run through that 21 * callback. You can still foreach() through the result set after. Valid 22 * PHP callbacks can be a string, an array, or a closure. 23 * {@link http://php.net/manual/en/language.pseudo-types.php} 24 * 25 * The callback function must accept 3 arguments: an entity, the getter 26 * used, and the options used. 27 * 28 * Results from the callback are stored in callbackResult. If the callback 29 * returns only booleans, callbackResults will be the combined result of 30 * all calls. If no entities are processed, callbackResults will be null. 31 * 32 * If the callback returns anything else, callbackresult will be an indexed 33 * array of whatever the callback returns. If returning error handling 34 * information, you should include enough information to determine which 35 * result you're referring to. 36 * 37 * Don't combine returning bools and returning something else. 38 * 39 * Note that returning false will not stop the foreach. 40 * 41 * @warning If your callback or foreach loop deletes or disable entities 42 * you MUST call setIncrementOffset(false) or set that when instantiating. 43 * This forces the offset to stay what it was in the $options array. 44 * 45 * @example 46 * <code> 47 * // using foreach 48 * $batch = new \ElggBatch('elgg_get_entities', array()); 49 * $batch->setIncrementOffset(false); 50 * 51 * foreach ($batch as $entity) { 52 * $entity->disable(); 53 * } 54 * 55 * // using both a callback 56 * $callback = function($result, $getter, $options) { 57 * var_dump("Looking at annotation id: $result->id"); 58 * return true; 59 * } 60 * 61 * $batch = new \ElggBatch('elgg_get_annotations', array('guid' => 2), $callback); 62 * 63 * // get a batch from an Elgg getter function 64 * $batch = elgg_get_entities([ 65 * 'batch' => true, 66 * ]); 67 * </code> 68 * 69 * @since 1.8 70 */ 71class ElggBatch implements BatchResult { 72 73 /** 74 * The objects to iterate over. 75 * 76 * @var array 77 */ 78 private $results = []; 79 80 /** 81 * The function used to get results. 82 * 83 * @var callable 84 */ 85 private $getter = null; 86 87 /** 88 * The given $options to alter and pass to the getter. 89 * 90 * @var array 91 */ 92 private $options = []; 93 94 /** 95 * The number of results to grab at a time. 96 * 97 * @var int 98 */ 99 private $chunkSize = 25; 100 101 /** 102 * A callback function to pass results through. 103 * 104 * @var callable 105 */ 106 private $callback = null; 107 108 /** 109 * Start after this many results. 110 * 111 * @var int 112 */ 113 private $offset = 0; 114 115 /** 116 * Stop after this many results. 117 * 118 * @var int 119 */ 120 private $limit = 0; 121 122 /** 123 * Number of processed results. 124 * 125 * @var int 126 */ 127 private $retrievedResults = 0; 128 129 /** 130 * The index of the current result within the current chunk 131 * 132 * @var int 133 */ 134 private $resultIndex = 0; 135 136 /** 137 * The index of the current chunk 138 * 139 * @var int 140 */ 141 private $chunkIndex = 0; 142 143 /** 144 * The number of results iterated through 145 * 146 * @var int 147 */ 148 private $processedResults = 0; 149 150 /** 151 * Is the getter a valid callback 152 * 153 * @var bool 154 */ 155 private $validGetter = null; 156 157 /** 158 * The result of running all entities through the callback function. 159 * 160 * @var mixed 161 */ 162 public $callbackResult = null; 163 164 /** 165 * If false, offset will not be incremented. This is used for callbacks/loops that delete. 166 * 167 * @var bool 168 */ 169 private $incrementOffset = true; 170 171 /** 172 * Entities that could not be instantiated during a fetch 173 * 174 * @var \stdClass[] 175 */ 176 private $incompleteEntities = []; 177 178 /** 179 * Total number of incomplete entities fetched 180 * 181 * @var int 182 */ 183 private $totalIncompletes = 0; 184 185 /** 186 * Batches operations on any elgg_get_*() or compatible function that supports 187 * an options array. 188 * 189 * Instead of returning all objects in memory, it goes through $chunk_size 190 * objects, then requests more from the server. This avoids OOM errors. 191 * 192 * @param callable $getter The function used to get objects. Usually 193 * an elgg_get_*() function, but can be any valid PHP callback. 194 * @param array $options The options array to pass to the getter function. If limit is 195 * not set, 10 is used as the default. In most cases that is not 196 * what you want. 197 * @param mixed $callback An optional callback function that all results will be passed 198 * to upon load. The callback needs to accept $result, $getter, 199 * $options. 200 * @param int $chunk_size The number of entities to pull in before requesting more. 201 * You have to balance this between running out of memory in PHP 202 * and hitting the db server too often. 203 * @param bool $inc_offset Increment the offset on each fetch. This must be false for 204 * callbacks that delete rows. You can set this after the 205 * object is created with {@link \ElggBatch::setIncrementOffset()}. 206 */ 207 public function __construct(callable $getter, $options, $callback = null, $chunk_size = 25, 208 $inc_offset = true) { 209 210 $this->getter = $getter; 211 $this->options = $options; 212 $this->callback = $callback; 213 $this->chunkSize = $chunk_size; 214 $this->setIncrementOffset($inc_offset); 215 216 if ($this->chunkSize <= 0) { 217 $this->chunkSize = 25; 218 } 219 220 // store these so we can compare later 221 $this->offset = elgg_extract('offset', $options, 0); 222 $this->limit = elgg_extract('limit', $options, _elgg_config()->default_limit); 223 224 // if passed a callback, create a new \ElggBatch with the same options 225 // and pass each to the callback. 226 if ($callback && is_callable($callback)) { 227 $batch = new \ElggBatch($getter, $options, null, $chunk_size, $inc_offset); 228 229 $all_results = null; 230 231 foreach ($batch as $result) { 232 $result = call_user_func($callback, $result, $getter, $options); 233 234 if (!isset($all_results)) { 235 if ($result === true || $result === false || $result === null) { 236 $all_results = $result; 237 } else { 238 $all_results = []; 239 } 240 } 241 242 if (($result === true || $result === false || $result === null) && !is_array($all_results)) { 243 $all_results = $result && $all_results; 244 } else { 245 $all_results[] = $result; 246 } 247 } 248 249 $this->callbackResult = $all_results; 250 } 251 } 252 253 /** 254 * Fetches the next chunk of results 255 * 256 * @return bool 257 */ 258 private function getNextResultsChunk() { 259 260 // always reset results. 261 $this->results = []; 262 263 if (!isset($this->validGetter)) { 264 $this->validGetter = is_callable($this->getter); 265 } 266 267 if (!$this->validGetter) { 268 return false; 269 } 270 271 $limit = $this->chunkSize; 272 273 // if someone passed limit = 0 they want everything. 274 if ($this->limit != 0) { 275 if ($this->retrievedResults >= $this->limit) { 276 return false; 277 } 278 279 // if original limit < chunk size, set limit to original limit 280 // else if the number of results we'll fetch if greater than the original limit 281 if ($this->limit < $this->chunkSize) { 282 $limit = $this->limit; 283 } elseif ($this->retrievedResults + $this->chunkSize > $this->limit) { 284 // set the limit to the number of results remaining in the original limit 285 $limit = $this->limit - $this->retrievedResults; 286 } 287 } 288 289 if ($this->incrementOffset) { 290 $offset = $this->offset + $this->retrievedResults; 291 } else { 292 $offset = $this->offset + $this->totalIncompletes; 293 } 294 295 $current_options = [ 296 'limit' => $limit, 297 'offset' => $offset, 298 '__ElggBatch' => $this, 299 ]; 300 301 $options = array_merge($this->options, $current_options); 302 303 $this->incompleteEntities = []; 304 $this->results = call_user_func($this->getter, $options); 305 306 // batch result sets tend to be large; we don't want to cache these. 307 _elgg_services()->queryCache->disable(); 308 309 $num_results = count($this->results); 310 $num_incomplete = count($this->incompleteEntities); 311 312 $this->totalIncompletes += $num_incomplete; 313 314 if (!empty($this->incompleteEntities)) { 315 // pad the front of the results with nulls representing the incompletes 316 array_splice($this->results, 0, 0, array_pad([], $num_incomplete, null)); 317 // ...and skip past them 318 reset($this->results); 319 for ($i = 0; $i < $num_incomplete; $i++) { 320 next($this->results); 321 } 322 } 323 324 if ($this->results) { 325 $this->chunkIndex++; 326 327 // let the system know we've jumped past the nulls 328 $this->resultIndex = $num_incomplete; 329 330 $this->retrievedResults += ($num_results + $num_incomplete); 331 if ($num_results == 0) { 332 // This fetch was *all* incompletes! We need to fetch until we can either 333 // offer at least one row to iterate over, or give up. 334 return $this->getNextResultsChunk(); 335 } 336 _elgg_services()->queryCache->enable(); 337 return true; 338 } else { 339 _elgg_services()->queryCache->enable(); 340 return false; 341 } 342 } 343 344 /** 345 * Increment the offset from the original options array? Setting to 346 * false is required for callbacks that delete rows. 347 * 348 * @param bool $increment Set to false when deleting data 349 * @return void 350 */ 351 public function setIncrementOffset($increment = true) { 352 $this->incrementOffset = (bool) $increment; 353 } 354 355 /** 356 * Set chunk size 357 * @param int $size Size 358 * @return void 359 */ 360 public function setChunkSize($size = 25) { 361 $this->chunkSize = $size; 362 } 363 /** 364 * Implements Iterator 365 */ 366 367 /** 368 * {@inheritdoc} 369 */ 370 public function rewind() { 371 $this->resultIndex = 0; 372 $this->retrievedResults = 0; 373 $this->processedResults = 0; 374 375 // only grab results if we haven't yet or we're crossing chunks 376 if ($this->chunkIndex == 0 || $this->limit > $this->chunkSize) { 377 $this->chunkIndex = 0; 378 $this->getNextResultsChunk(); 379 } 380 } 381 382 /** 383 * {@inheritdoc} 384 */ 385 public function current() { 386 return current($this->results); 387 } 388 389 /** 390 * {@inheritdoc} 391 */ 392 public function key() { 393 return $this->processedResults; 394 } 395 396 /** 397 * {@inheritdoc} 398 */ 399 public function next() { 400 // if we'll be at the end. 401 if (($this->processedResults + 1) >= $this->limit && $this->limit > 0) { 402 $this->results = []; 403 return false; 404 } 405 406 // if we'll need new results. 407 if (($this->resultIndex + 1) >= $this->chunkSize) { 408 if (!$this->getNextResultsChunk()) { 409 $this->results = []; 410 return false; 411 } 412 413 $result = current($this->results); 414 } else { 415 // the function above resets the indexes, so only inc if not 416 // getting new set 417 $this->resultIndex++; 418 $result = next($this->results); 419 } 420 421 $this->processedResults++; 422 return $result; 423 } 424 425 /** 426 * {@inheritdoc} 427 */ 428 public function valid() { 429 if (!is_array($this->results)) { 430 return false; 431 } 432 $key = key($this->results); 433 return ($key !== null && $key !== false); 434 } 435 436 /** 437 * Count the total results available at this moment. 438 * 439 * As this performs a separate query, the count returned may not match the number of results you can 440 * fetch via iteration on a very active DB. 441 * 442 * @see Countable::count() 443 * @return int 444 */ 445 public function count() { 446 if (!is_callable($this->getter)) { 447 $inspector = new \Elgg\Debug\Inspector(); 448 throw new RuntimeException("Getter is not callable: " . $inspector->describeCallable($this->getter)); 449 } 450 451 $options = array_merge($this->options, ['count' => true]); 452 453 return call_user_func($this->getter, $options); 454 } 455} 456