1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * Prediction model representation.
19 *
20 * @package   core_analytics
21 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
22 * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace core_analytics;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
30 * Prediction model representation.
31 *
32 * @package   core_analytics
33 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
34 * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class model {
37
38    /**
39     * All as expected.
40     */
41    const OK = 0;
42
43    /**
44     * There was a problem.
45     */
46    const GENERAL_ERROR = 1;
47
48    /**
49     * No dataset to analyse.
50     */
51    const NO_DATASET = 2;
52
53    /**
54     * Model with low prediction accuracy.
55     */
56    const LOW_SCORE = 4;
57
58    /**
59     * Not enough data to evaluate the model properly.
60     */
61    const NOT_ENOUGH_DATA = 8;
62
63    /**
64     * Invalid analysable for the time splitting method.
65     */
66    const ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD = 4;
67
68    /**
69     * Invalid analysable for all time splitting methods.
70     */
71    const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8;
72
73    /**
74     * Invalid analysable for the target
75     */
76    const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16;
77
78    /**
79     * Minimum score to consider a non-static prediction model as good.
80     */
81    const MIN_SCORE = 0.7;
82
83    /**
84     * Minimum prediction confidence (from 0 to 1) to accept a prediction as reliable enough.
85     */
86    const PREDICTION_MIN_SCORE = 0.6;
87
88    /**
89     * Maximum standard deviation between different evaluation repetitions to consider that evaluation results are stable.
90     */
91    const ACCEPTED_DEVIATION = 0.05;
92
93    /**
94     * Number of evaluation repetitions.
95     */
96    const EVALUATION_ITERATIONS = 10;
97
98    /**
99     * @var \stdClass
100     */
101    protected $model = null;
102
103    /**
104     * @var \core_analytics\local\analyser\base
105     */
106    protected $analyser = null;
107
108    /**
109     * @var \core_analytics\local\target\base
110     */
111    protected $target = null;
112
113    /**
114     * @var \core_analytics\predictor
115     */
116    protected $predictionsprocessor = null;
117
118    /**
119     * @var \core_analytics\local\indicator\base[]
120     */
121    protected $indicators = null;
122
123    /**
124     * @var \context[]
125     */
126    protected $contexts = null;
127
128    /**
129     * Unique Model id created from site info and last model modification.
130     *
131     * @var string
132     */
133    protected $uniqueid = null;
134
135    /**
136     * Constructor.
137     *
138     * @param int|\stdClass $model
139     * @return void
140     */
141    public function __construct($model) {
142        global $DB;
143
144        if (is_scalar($model)) {
145            $model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST);
146            if (!$model) {
147                throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model);
148            }
149        }
150        $this->model = $model;
151    }
152
153    /**
154     * Quick safety check to discard site models which required components are not available anymore.
155     *
156     * @return bool
157     */
158    public function is_available() {
159        $target = $this->get_target();
160        if (!$target) {
161            return false;
162        }
163
164        $classname = $target->get_analyser_class();
165        if (!class_exists($classname)) {
166            return false;
167        }
168
169        return true;
170    }
171
172    /**
173     * Returns the model id.
174     *
175     * @return int
176     */
177    public function get_id() {
178        return $this->model->id;
179    }
180
181    /**
182     * Returns a plain \stdClass with the model data.
183     *
184     * @return \stdClass
185     */
186    public function get_model_obj() {
187        return $this->model;
188    }
189
190    /**
191     * Returns the model target.
192     *
193     * @return \core_analytics\local\target\base
194     */
195    public function get_target() {
196        if ($this->target !== null) {
197            return $this->target;
198        }
199        $instance = \core_analytics\manager::get_target($this->model->target);
200        $this->target = $instance;
201
202        return $this->target;
203    }
204
205    /**
206     * Returns the model indicators.
207     *
208     * @return \core_analytics\local\indicator\base[]
209     */
210    public function get_indicators() {
211        if ($this->indicators !== null) {
212            return $this->indicators;
213        }
214
215        $fullclassnames = json_decode($this->model->indicators);
216
217        if (!is_array($fullclassnames)) {
218            throw new \coding_exception('Model ' . $this->model->id . ' indicators can not be read');
219        }
220
221        $this->indicators = array();
222        foreach ($fullclassnames as $fullclassname) {
223            $instance = \core_analytics\manager::get_indicator($fullclassname);
224            if ($instance) {
225                $this->indicators[$fullclassname] = $instance;
226            } else {
227                debugging('Can\'t load ' . $fullclassname . ' indicator', DEBUG_DEVELOPER);
228            }
229        }
230
231        return $this->indicators;
232    }
233
234    /**
235     * Returns the list of indicators that could potentially be used by the model target.
236     *
237     * It includes the indicators that are part of the model.
238     *
239     * @return \core_analytics\local\indicator\base[]
240     */
241    public function get_potential_indicators() {
242
243        $indicators = \core_analytics\manager::get_all_indicators();
244
245        if (empty($this->analyser)) {
246            $this->init_analyser(array('notimesplitting' => true));
247        }
248
249        foreach ($indicators as $classname => $indicator) {
250            if ($this->analyser->check_indicator_requirements($indicator) !== true) {
251                unset($indicators[$classname]);
252            }
253        }
254        return $indicators;
255    }
256
257    /**
258     * Returns the model analyser (defined by the model target).
259     *
260     * @param array $options Default initialisation with no options.
261     * @return \core_analytics\local\analyser\base
262     */
263    public function get_analyser($options = array()) {
264        if ($this->analyser !== null) {
265            return $this->analyser;
266        }
267
268        $this->init_analyser($options);
269
270        return $this->analyser;
271    }
272
273    /**
274     * Initialises the model analyser.
275     *
276     * @throws \coding_exception
277     * @param array $options
278     * @return void
279     */
280    protected function init_analyser($options = array()) {
281
282        $target = $this->get_target();
283        $indicators = $this->get_indicators();
284
285        if (empty($target)) {
286            throw new \moodle_exception('errornotarget', 'analytics');
287        }
288
289        $potentialtimesplittings = $this->get_potential_timesplittings();
290
291        $timesplittings = array();
292        if (empty($options['notimesplitting'])) {
293            if (!empty($options['evaluation'])) {
294                // The evaluation process will run using all available time splitting methods unless one is specified.
295                if (!empty($options['timesplitting'])) {
296                    $timesplitting = \core_analytics\manager::get_time_splitting($options['timesplitting']);
297
298                    if (empty($potentialtimesplittings[$timesplitting->get_id()])) {
299                        throw new \moodle_exception('errorcannotusetimesplitting', 'analytics');
300                    }
301                    $timesplittings = array($timesplitting->get_id() => $timesplitting);
302                } else {
303                    $timesplittingsforevaluation = \core_analytics\manager::get_time_splitting_methods_for_evaluation();
304
305                    // They both have the same objects, using $potentialtimesplittings as its items are sorted.
306                    $timesplittings = array_intersect_key($potentialtimesplittings, $timesplittingsforevaluation);
307                }
308            } else {
309
310                if (empty($this->model->timesplitting)) {
311                    throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
312                }
313
314                // Returned as an array as all actions (evaluation, training and prediction) go through the same process.
315                $timesplittings = array($this->model->timesplitting => $this->get_time_splitting());
316            }
317
318            if (empty($timesplittings)) {
319                throw new \moodle_exception('errornotimesplittings', 'analytics');
320            }
321        }
322
323        $classname = $target->get_analyser_class();
324        if (!class_exists($classname)) {
325            throw new \coding_exception($classname . ' class does not exists');
326        }
327
328        // Returns a \core_analytics\local\analyser\base class.
329        $this->analyser = new $classname($this->model->id, $target, $indicators, $timesplittings, $options);
330    }
331
332    /**
333     * Returns the model time splitting method.
334     *
335     * @return \core_analytics\local\time_splitting\base|false Returns false if no time splitting.
336     */
337    public function get_time_splitting() {
338        if (empty($this->model->timesplitting)) {
339            return false;
340        }
341        return \core_analytics\manager::get_time_splitting($this->model->timesplitting);
342    }
343
344    /**
345     * Returns the time-splitting methods that can be used by this model.
346     *
347     * @return \core_analytics\local\time_splitting\base[]
348     */
349    public function get_potential_timesplittings() {
350
351        $timesplittings = \core_analytics\manager::get_all_time_splittings();
352        uasort($timesplittings, function($a, $b) {
353            return strcasecmp($a->get_name(), $b->get_name());
354        });
355
356        foreach ($timesplittings as $key => $timesplitting) {
357            if (!$this->get_target()->can_use_timesplitting($timesplitting)) {
358                unset($timesplittings[$key]);
359                continue;
360            }
361        }
362        return $timesplittings;
363    }
364
365    /**
366     * Creates a new model. Enables it if $timesplittingid is specified.
367     *
368     * @param \core_analytics\local\target\base $target
369     * @param \core_analytics\local\indicator\base[] $indicators
370     * @param string|false $timesplittingid The time splitting method id (its fully qualified class name)
371     * @param string|null $processor The machine learning backend this model will use.
372     * @return \core_analytics\model
373     */
374    public static function create(\core_analytics\local\target\base $target, array $indicators,
375                                  $timesplittingid = false, $processor = null) {
376        global $USER, $DB;
377
378        $indicatorclasses = self::indicator_classes($indicators);
379
380        $now = time();
381
382        $modelobj = new \stdClass();
383        $modelobj->target = $target->get_id();
384        $modelobj->indicators = json_encode($indicatorclasses);
385        $modelobj->version = $now;
386        $modelobj->timecreated = $now;
387        $modelobj->timemodified = $now;
388        $modelobj->usermodified = $USER->id;
389
390        if ($target->based_on_assumptions()) {
391            $modelobj->trained = 1;
392        }
393
394        if ($timesplittingid) {
395            if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
396                throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
397            }
398            if (substr($timesplittingid, 0, 1) !== '\\') {
399                throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
400            }
401            $modelobj->timesplitting = $timesplittingid;
402        }
403
404        if ($processor &&
405                !manager::is_valid($processor, '\core_analytics\classifier') &&
406                !manager::is_valid($processor, '\core_analytics\regressor')) {
407            throw new \coding_exception('The provided predictions processor \\' . $processor . '\processor is not valid');
408        } else {
409            $modelobj->predictionsprocessor = $processor;
410        }
411
412        $id = $DB->insert_record('analytics_models', $modelobj);
413
414        // Get db defaults.
415        $modelobj = $DB->get_record('analytics_models', array('id' => $id), '*', MUST_EXIST);
416
417        $model = new static($modelobj);
418
419        return $model;
420    }
421
422    /**
423     * Does this model exist?
424     *
425     * If no indicators are provided it considers any model with the provided
426     * target a match.
427     *
428     * @param \core_analytics\local\target\base $target
429     * @param \core_analytics\local\indicator\base[]|false $indicators
430     * @return bool
431     */
432    public static function exists(\core_analytics\local\target\base $target, $indicators = false) {
433        global $DB;
434
435        $existingmodels = $DB->get_records('analytics_models', array('target' => $target->get_id()));
436
437        if (!$existingmodels) {
438            return false;
439        }
440
441        if (!$indicators && $existingmodels) {
442            return true;
443        }
444
445        $indicatorids = array_keys($indicators);
446        sort($indicatorids);
447
448        foreach ($existingmodels as $modelobj) {
449            $model = new \core_analytics\model($modelobj);
450            $modelindicatorids = array_keys($model->get_indicators());
451            sort($modelindicatorids);
452
453            if ($indicatorids === $modelindicatorids) {
454                return true;
455            }
456        }
457        return false;
458    }
459
460    /**
461     * Updates the model.
462     *
463     * @param int|bool $enabled
464     * @param \core_analytics\local\indicator\base[]|false $indicators False to respect current indicators
465     * @param string|false $timesplittingid False to respect current time splitting method
466     * @param string|false $predictionsprocessor False to respect current predictors processor value
467     * @param int[]|false $contextids List of context ids for this model. False to respect the current list of contexts.
468     * @return void
469     */
470    public function update($enabled, $indicators = false, $timesplittingid = '', $predictionsprocessor = false,
471            $contextids = false) {
472        global $USER, $DB;
473
474        \core_analytics\manager::check_can_manage_models();
475
476        $now = time();
477
478        if ($indicators !== false) {
479            $indicatorclasses = self::indicator_classes($indicators);
480            $indicatorsstr = json_encode($indicatorclasses);
481        } else {
482            // Respect current value.
483            $indicatorsstr = $this->model->indicators;
484        }
485
486        if ($timesplittingid === false) {
487            // Respect current value.
488            $timesplittingid = $this->model->timesplitting;
489        }
490
491        if ($predictionsprocessor === false) {
492            // Respect current value.
493            $predictionsprocessor = $this->model->predictionsprocessor;
494        }
495
496        if ($contextids === false) {
497            $contextsstr = $this->model->contextids;
498        } else if (!$contextids) {
499            $contextsstr = null;
500        } else {
501            $contextsstr = json_encode($contextids);
502
503            // Reset the internal cache.
504            $this->contexts = null;
505        }
506
507        if ($this->model->timesplitting !== $timesplittingid ||
508                $this->model->indicators !== $indicatorsstr ||
509                $this->model->predictionsprocessor !== $predictionsprocessor) {
510
511            // Delete generated predictions before changing the model version.
512            $this->clear();
513
514            // It needs to be reset as the version changes.
515            $this->uniqueid = null;
516            $this->indicators = null;
517
518            // We update the version of the model so different time splittings are not mixed up.
519            $this->model->version = $now;
520
521            // Reset trained flag.
522            if (!$this->is_static()) {
523                $this->model->trained = 0;
524            }
525
526        } else if ($this->model->enabled != $enabled) {
527            // We purge the cached contexts with insights as some will not be visible anymore.
528            $this->purge_insights_cache();
529        }
530
531        $this->model->enabled = intval($enabled);
532        $this->model->indicators = $indicatorsstr;
533        $this->model->timesplitting = $timesplittingid;
534        $this->model->predictionsprocessor = $predictionsprocessor;
535        $this->model->contextids = $contextsstr;
536        $this->model->timemodified = $now;
537        $this->model->usermodified = $USER->id;
538
539        $DB->update_record('analytics_models', $this->model);
540    }
541
542    /**
543     * Removes the model.
544     *
545     * @return void
546     */
547    public function delete() {
548        global $DB;
549
550        \core_analytics\manager::check_can_manage_models();
551
552        $this->clear();
553
554        // Method self::clear is already clearing the current model version.
555        $predictor = $this->get_predictions_processor(false);
556        if ($predictor->is_ready() !== true) {
557            $predictorname = \core_analytics\manager::get_predictions_processor_name($predictor);
558            debugging('Prediction processor ' . $predictorname . ' is not ready to be used. Model ' .
559                $this->model->id . ' could not be deleted.');
560        } else {
561            $predictor->delete_output_dir($this->get_output_dir(array(), true), $this->get_unique_id());
562        }
563
564        $DB->delete_records('analytics_models', array('id' => $this->model->id));
565        $DB->delete_records('analytics_models_log', array('modelid' => $this->model->id));
566    }
567
568    /**
569     * Evaluates the model.
570     *
571     * This method gets the site contents (through the analyser) creates a .csv dataset
572     * with them and evaluates the model prediction accuracy multiple times using the
573     * machine learning backend. It returns an object where the model score is the average
574     * prediction accuracy of all executed evaluations.
575     *
576     * @param array $options
577     * @return \stdClass[]
578     */
579    public function evaluate($options = array()) {
580
581        \core_analytics\manager::check_can_manage_models();
582
583        if ($this->is_static()) {
584            $this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics'));
585            $result = new \stdClass();
586            $result->status = self::NO_DATASET;
587            return array($result);
588        }
589
590        $options['evaluation'] = true;
591
592        if (empty($options['mode'])) {
593            $options['mode'] = 'configuration';
594        }
595
596        switch ($options['mode']) {
597            case 'trainedmodel':
598
599                // We are only interested on the time splitting method used by the trained model.
600                $options['timesplitting'] = $this->model->timesplitting;
601
602                // Provide the trained model directory to the ML backend if that is what we want to evaluate.
603                $trainedmodeldir = $this->get_output_dir(['execution']);
604                break;
605            case 'configuration':
606
607                $trainedmodeldir = false;
608                break;
609
610            default:
611                throw new \moodle_exception('errorunknownaction', 'analytics');
612        }
613
614        $this->init_analyser($options);
615
616        if (empty($this->get_indicators())) {
617            throw new \moodle_exception('errornoindicators', 'analytics');
618        }
619
620        $this->heavy_duty_mode();
621
622        // Before get_labelled_data call so we get an early exception if it is not ready.
623        $predictor = $this->get_predictions_processor();
624
625        $datasets = $this->get_analyser()->get_labelled_data($this->get_contexts());
626
627        // No datasets generated.
628        if (empty($datasets)) {
629            $result = new \stdClass();
630            $result->status = self::NO_DATASET;
631            $result->info = $this->get_analyser()->get_logs();
632            return array($result);
633        }
634
635        if (!PHPUNIT_TEST && CLI_SCRIPT) {
636            echo PHP_EOL . get_string('processingsitecontents', 'analytics') . PHP_EOL;
637        }
638
639        $results = array();
640        foreach ($datasets as $timesplittingid => $dataset) {
641
642            $timesplitting = \core_analytics\manager::get_time_splitting($timesplittingid);
643
644            $result = new \stdClass();
645
646            $dashestimesplittingid = str_replace('\\', '', $timesplittingid);
647            $outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid));
648
649            // Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
650            if ($this->get_target()->is_linear()) {
651                $predictorresult = $predictor->evaluate_regression($this->get_unique_id(), self::ACCEPTED_DEVIATION,
652                    self::EVALUATION_ITERATIONS, $dataset, $outputdir, $trainedmodeldir);
653            } else {
654                $predictorresult = $predictor->evaluate_classification($this->get_unique_id(), self::ACCEPTED_DEVIATION,
655                    self::EVALUATION_ITERATIONS, $dataset, $outputdir, $trainedmodeldir);
656            }
657
658            $result->status = $predictorresult->status;
659            $result->info = $predictorresult->info;
660
661            if (isset($predictorresult->score)) {
662                $result->score = $predictorresult->score;
663            } else {
664                // Prediction processors may return an error, default to 0 score in that case.
665                $result->score = 0;
666            }
667
668            $dir = false;
669            if (!empty($predictorresult->dir)) {
670                $dir = $predictorresult->dir;
671            }
672
673            $result->logid = $this->log_result($timesplitting->get_id(), $result->score, $dir, $result->info, $options['mode']);
674
675            $results[$timesplitting->get_id()] = $result;
676        }
677
678        return $results;
679    }
680
681    /**
682     * Trains the model using the site contents.
683     *
684     * This method prepares a dataset from the site contents (through the analyser)
685     * and passes it to the machine learning backends. Static models are skipped as
686     * they do not require training.
687     *
688     * @return \stdClass
689     */
690    public function train() {
691
692        \core_analytics\manager::check_can_manage_models();
693
694        if ($this->is_static()) {
695            $this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics'));
696            $result = new \stdClass();
697            $result->status = self::OK;
698            return $result;
699        }
700
701        if (!$this->is_enabled() || empty($this->model->timesplitting)) {
702            throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
703        }
704
705        if (empty($this->get_indicators())) {
706            throw new \moodle_exception('errornoindicators', 'analytics');
707        }
708
709        $this->heavy_duty_mode();
710
711        // Before get_labelled_data call so we get an early exception if it is not writable.
712        $outputdir = $this->get_output_dir(array('execution'));
713
714        // Before get_labelled_data call so we get an early exception if it is not ready.
715        $predictor = $this->get_predictions_processor();
716
717        $datasets = $this->get_analyser()->get_labelled_data($this->get_contexts());
718
719        // No training if no files have been provided.
720        if (empty($datasets) || empty($datasets[$this->model->timesplitting])) {
721
722            $result = new \stdClass();
723            $result->status = self::NO_DATASET;
724            $result->info = $this->get_analyser()->get_logs();
725            return $result;
726        }
727        $samplesfile = $datasets[$this->model->timesplitting];
728
729        // Train using the dataset.
730        if ($this->get_target()->is_linear()) {
731            $predictorresult = $predictor->train_regression($this->get_unique_id(), $samplesfile, $outputdir);
732        } else {
733            $predictorresult = $predictor->train_classification($this->get_unique_id(), $samplesfile, $outputdir);
734        }
735
736        $result = new \stdClass();
737        $result->status = $predictorresult->status;
738        $result->info = $predictorresult->info;
739
740        if ($result->status !== self::OK) {
741            return $result;
742        }
743
744        $this->flag_file_as_used($samplesfile, 'trained');
745
746        // Mark the model as trained if it wasn't.
747        if ($this->model->trained == false) {
748            $this->mark_as_trained();
749        }
750
751        return $result;
752    }
753
754    /**
755     * Get predictions from the site contents.
756     *
757     * It analyses the site contents (through analyser classes) looking for samples
758     * ready to receive predictions. It generates a dataset with all samples ready to
759     * get predictions and it passes it to the machine learning backends or to the
760     * targets based on assumptions to get the predictions.
761     *
762     * @return \stdClass
763     */
764    public function predict() {
765        global $DB;
766
767        \core_analytics\manager::check_can_manage_models();
768
769        if (!$this->is_enabled() || empty($this->model->timesplitting)) {
770            throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
771        }
772
773        if (empty($this->get_indicators())) {
774            throw new \moodle_exception('errornoindicators', 'analytics');
775        }
776
777        $this->heavy_duty_mode();
778
779        // Before get_unlabelled_data call so we get an early exception if it is not writable.
780        $outputdir = $this->get_output_dir(array('execution'));
781
782        if (!$this->is_static()) {
783            // Predictions using a machine learning backend.
784
785            // Before get_unlabelled_data call so we get an early exception if it is not ready.
786            $predictor = $this->get_predictions_processor();
787
788            $samplesdata = $this->get_analyser()->get_unlabelled_data($this->get_contexts());
789
790            // Get the prediction samples file.
791            if (empty($samplesdata) || empty($samplesdata[$this->model->timesplitting])) {
792
793                $result = new \stdClass();
794                $result->status = self::NO_DATASET;
795                $result->info = $this->get_analyser()->get_logs();
796                return $result;
797            }
798            $samplesfile = $samplesdata[$this->model->timesplitting];
799
800            // We need to throw an exception if we are trying to predict stuff that was already predicted.
801            $params = array('modelid' => $this->model->id, 'action' => 'predicted', 'fileid' => $samplesfile->get_id());
802            if ($predicted = $DB->get_record('analytics_used_files', $params)) {
803                throw new \moodle_exception('erroralreadypredict', 'analytics', '', $samplesfile->get_id());
804            }
805
806            $indicatorcalculations = \core_analytics\dataset_manager::get_structured_data($samplesfile);
807
808            // Estimation and classification processes run on the machine learning backend side.
809            if ($this->get_target()->is_linear()) {
810                $predictorresult = $predictor->estimate($this->get_unique_id(), $samplesfile, $outputdir);
811            } else {
812                $predictorresult = $predictor->classify($this->get_unique_id(), $samplesfile, $outputdir);
813            }
814
815            // Prepare the results object.
816            $result = new \stdClass();
817            $result->status = $predictorresult->status;
818            $result->info = $predictorresult->info;
819            $result->predictions = $this->format_predictor_predictions($predictorresult);
820
821        } else {
822            // Predictions based on assumptions.
823
824            $indicatorcalculations = $this->get_analyser()->get_static_data($this->get_contexts());
825            // Get the prediction samples file.
826            if (empty($indicatorcalculations) || empty($indicatorcalculations[$this->model->timesplitting])) {
827
828                $result = new \stdClass();
829                $result->status = self::NO_DATASET;
830                $result->info = $this->get_analyser()->get_logs();
831                return $result;
832            }
833
834            // Same as reset($indicatorcalculations) as models based on assumptions only analyse 1 single
835            // time-splitting method.
836            $indicatorcalculations = $indicatorcalculations[$this->model->timesplitting];
837
838            // Prepare the results object.
839            $result = new \stdClass();
840            $result->status = self::OK;
841            $result->info = [];
842            $result->predictions = $this->get_static_predictions($indicatorcalculations);
843        }
844
845        if ($result->status !== self::OK) {
846            return $result;
847        }
848
849        if ($result->predictions) {
850            list($samplecontexts, $predictionrecords) = $this->execute_prediction_callbacks($result->predictions,
851                $indicatorcalculations);
852        }
853
854        if (!empty($samplecontexts) && $this->uses_insights()) {
855            $this->trigger_insights($samplecontexts, $predictionrecords);
856        }
857
858        if (!$this->is_static()) {
859            $this->flag_file_as_used($samplesfile, 'predicted');
860        }
861
862        return $result;
863    }
864
865    /**
866     * Returns the model predictions processor.
867     *
868     * @param bool $checkisready
869     * @return \core_analytics\predictor
870     */
871    public function get_predictions_processor($checkisready = true) {
872        return manager::get_predictions_processor($this->model->predictionsprocessor, $checkisready);
873    }
874
875    /**
876     * Formats the predictor results.
877     *
878     * @param array $predictorresult
879     * @return array
880     */
881    private function format_predictor_predictions($predictorresult) {
882
883        $predictions = array();
884        if (!empty($predictorresult->predictions)) {
885            foreach ($predictorresult->predictions as $sampleinfo) {
886
887                // We parse each prediction.
888                switch (count($sampleinfo)) {
889                    case 1:
890                        // For whatever reason the predictions processor could not process this sample, we
891                        // skip it and do nothing with it.
892                        debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
893                            $sampleinfo[0], DEBUG_DEVELOPER);
894                        continue 2;
895                    case 2:
896                        // Prediction processors that do not return a prediction score will have the maximum prediction
897                        // score.
898                        list($uniquesampleid, $prediction) = $sampleinfo;
899                        $predictionscore = 1;
900                        break;
901                    case 3:
902                        list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
903                        break;
904                    default:
905                        break;
906                }
907                $predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
908                $predictions[$uniquesampleid] = $predictiondata;
909            }
910        }
911        return $predictions;
912    }
913
914    /**
915     * Execute the prediction callbacks defined by the target.
916     *
917     * @param \stdClass[] $predictions
918     * @param array $indicatorcalculations
919     * @return array
920     */
921    protected function execute_prediction_callbacks(&$predictions, $indicatorcalculations) {
922
923        // Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
924        $samplecontexts = array();
925        $records = array();
926
927        foreach ($predictions as $uniquesampleid => $prediction) {
928
929            // The unique sample id contains both the sampleid and the rangeindex.
930            list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
931            if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
932
933                // Prepare the record to store the predicted values.
934                list($record, $samplecontext) = $this->prepare_prediction_record($sampleid, $rangeindex, $prediction->prediction,
935                    $prediction->predictionscore, json_encode($indicatorcalculations[$uniquesampleid]));
936
937                // We will later bulk-insert them all.
938                $records[$uniquesampleid] = $record;
939
940                // Also store all samples context to later generate insights or whatever action the target wants to perform.
941                $samplecontexts[$samplecontext->id] = $samplecontext;
942
943                $this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
944                    $prediction->prediction, $prediction->predictionscore);
945            }
946        }
947
948        if (!empty($records)) {
949            $this->save_predictions($records);
950        }
951
952        return [$samplecontexts, $records];
953    }
954
955    /**
956     * Generates insights and updates the cache.
957     *
958     * @param \context[] $samplecontexts
959     * @param  \stdClass[] $predictionrecords
960     * @return void
961     */
962    protected function trigger_insights($samplecontexts, $predictionrecords) {
963
964        // Notify the target that all predictions have been processed.
965        if ($this->get_analyser()::one_sample_per_analysable()) {
966
967            // We need to do something unusual here. self::save_predictions uses the bulk-insert function (insert_records()) for
968            // performance reasons and that function does not return us the inserted ids. We need to retrieve them from
969            // the database, and we need to do it using one single database query (for performance reasons as well).
970            $predictionrecords = $this->add_prediction_ids($predictionrecords);
971
972            $samplesdata = $this->predictions_sample_data($predictionrecords);
973            $samplesdata = $this->append_calculations_info($predictionrecords, $samplesdata);
974
975            $predictions = array_map(function($predictionobj) use ($samplesdata) {
976                $prediction = new \core_analytics\prediction($predictionobj, $samplesdata[$predictionobj->sampleid]);
977                return $prediction;
978            }, $predictionrecords);
979        } else {
980            $predictions = [];
981        }
982
983        $this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts, $predictions);
984
985        if ($this->get_target()->link_insights_report()) {
986
987            // Update cache.
988            foreach ($samplecontexts as $context) {
989                \core_analytics\manager::cached_models_with_insights($context, $this->get_id());
990            }
991        }
992    }
993
994    /**
995     * Get predictions from a static model.
996     *
997     * @param array $indicatorcalculations
998     * @return \stdClass[]
999     */
1000    protected function get_static_predictions(&$indicatorcalculations) {
1001
1002        $headers = array_shift($indicatorcalculations);
1003
1004        // Get rid of the sampleid header.
1005        array_shift($headers);
1006
1007        // Group samples by analysable for \core_analytics\local\target::calculate.
1008        $analysables = array();
1009        // List all sampleids together.
1010        $sampleids = array();
1011
1012        foreach ($indicatorcalculations as $uniquesampleid => $indicators) {
1013
1014            // Get rid of the sampleid column.
1015            unset($indicators[0]);
1016            $indicators = array_combine($headers, $indicators);
1017            $indicatorcalculations[$uniquesampleid] = $indicators;
1018
1019            list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
1020
1021            $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
1022            $analysableclass = get_class($analysable);
1023            if (empty($analysables[$analysableclass])) {
1024                $analysables[$analysableclass] = array();
1025            }
1026            if (empty($analysables[$analysableclass][$rangeindex])) {
1027                $analysables[$analysableclass][$rangeindex] = (object)[
1028                    'analysable' => $analysable,
1029                    'indicatorsdata' => array(),
1030                    'sampleids' => array()
1031                ];
1032            }
1033
1034            // Using the sampleid as a key so we can easily merge indicators data later.
1035            $analysables[$analysableclass][$rangeindex]->indicatorsdata[$sampleid] = $indicators;
1036            // We could use indicatorsdata keys but the amount of redundant data is not that big and leaves code below cleaner.
1037            $analysables[$analysableclass][$rangeindex]->sampleids[$sampleid] = $sampleid;
1038
1039            // Accumulate sample ids to get all their associated data in 1 single db query (analyser::get_samples).
1040            $sampleids[$sampleid] = $sampleid;
1041        }
1042
1043        // Get all samples data.
1044        list($sampleids, $samplesdata) = $this->get_samples($sampleids);
1045
1046        // Calculate the targets.
1047        $predictions = array();
1048        foreach ($analysables as $analysableclass => $rangedata) {
1049            foreach ($rangedata as $rangeindex => $data) {
1050
1051                // Attach samples data and calculated indicators data.
1052                $this->get_target()->clear_sample_data();
1053                $this->get_target()->add_sample_data($samplesdata);
1054                $this->get_target()->add_sample_data($data->indicatorsdata);
1055
1056                // Append new elements (we can not get duplicates because sample-analysable relation is N-1).
1057                $timesplitting = $this->get_time_splitting();
1058                $timesplitting->set_modelid($this->get_id());
1059                $timesplitting->set_analysable($data->analysable);
1060                $range = $timesplitting->get_range_by_index($rangeindex);
1061
1062                $this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false);
1063                $calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']);
1064
1065                // Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove
1066                // these $uniquesampleid from $indicatorcalculations because otherwise they will be stored as calculated
1067                // by self::save_prediction.
1068                $indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid)
1069                        use ($calculations, $rangeindex) {
1070                    list($sampleid, $indicatorsrangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
1071                    if ($rangeindex == $indicatorsrangeindex && !isset($calculations[$sampleid])) {
1072                        return false;
1073                    }
1074                    return true;
1075                }, ARRAY_FILTER_USE_BOTH);
1076
1077                foreach ($calculations as $sampleid => $value) {
1078
1079                    $uniquesampleid = $this->get_time_splitting()->append_rangeindex($sampleid, $rangeindex);
1080
1081                    // Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations.
1082                    if (is_null($calculations[$sampleid])) {
1083                        unset($indicatorcalculations[$uniquesampleid]);
1084                        continue;
1085                    }
1086
1087                    // Even if static predictions are based on assumptions we flag them as 100% because they are 100%
1088                    // true according to what the developer defined.
1089                    $predictions[$uniquesampleid] = (object)['prediction' => $value, 'predictionscore' => 1];
1090                }
1091            }
1092        }
1093        return $predictions;
1094    }
1095
1096    /**
1097     * Stores the prediction in the database.
1098     *
1099     * @param int $sampleid
1100     * @param int $rangeindex
1101     * @param int $prediction
1102     * @param float $predictionscore
1103     * @param string $calculations
1104     * @return \context
1105     */
1106    protected function prepare_prediction_record($sampleid, $rangeindex, $prediction, $predictionscore, $calculations) {
1107        $context = $this->get_analyser()->sample_access_context($sampleid);
1108
1109        $record = new \stdClass();
1110        $record->modelid = $this->model->id;
1111        $record->contextid = $context->id;
1112        $record->sampleid = $sampleid;
1113        $record->rangeindex = $rangeindex;
1114        $record->prediction = $prediction;
1115        $record->predictionscore = $predictionscore;
1116        $record->calculations = $calculations;
1117        $record->timecreated = time();
1118
1119        $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
1120        $timesplitting = $this->get_time_splitting();
1121        $timesplitting->set_modelid($this->get_id());
1122        $timesplitting->set_analysable($analysable);
1123        $range = $timesplitting->get_range_by_index($rangeindex);
1124        if ($range) {
1125            $record->timestart = $range['start'];
1126            $record->timeend = $range['end'];
1127        }
1128
1129        return array($record, $context);
1130    }
1131
1132    /**
1133     * Save the prediction objects.
1134     *
1135     * @param \stdClass[] $records
1136     */
1137    protected function save_predictions($records) {
1138        global $DB;
1139        $DB->insert_records('analytics_predictions', $records);
1140    }
1141
1142    /**
1143     * Enabled the model using the provided time splitting method.
1144     *
1145     * @param string|false $timesplittingid False to respect the current time splitting method.
1146     * @return void
1147     */
1148    public function enable($timesplittingid = false) {
1149        global $DB, $USER;
1150
1151        $now = time();
1152
1153        if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) {
1154
1155            if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
1156                throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
1157            }
1158
1159            if (substr($timesplittingid, 0, 1) !== '\\') {
1160                throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
1161            }
1162
1163            // Delete generated predictions before changing the model version.
1164            $this->clear();
1165
1166            // It needs to be reset as the version changes.
1167            $this->uniqueid = null;
1168
1169            $this->model->timesplitting = $timesplittingid;
1170            $this->model->version = $now;
1171
1172            // Reset trained flag.
1173            if (!$this->is_static()) {
1174                $this->model->trained = 0;
1175            }
1176        } else if (empty($this->model->timesplitting)) {
1177            // A valid timesplitting method needs to be supplied before a model can be enabled.
1178            throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
1179
1180        }
1181
1182        // Purge pages with insights as this may change things.
1183        if ($this->model->enabled != 1) {
1184            $this->purge_insights_cache();
1185        }
1186
1187        $this->model->enabled = 1;
1188        $this->model->timemodified = $now;
1189        $this->model->usermodified = $USER->id;
1190
1191        // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates.
1192        $DB->update_record('analytics_models', $this->model);
1193    }
1194
1195    /**
1196     * Is this a static model (as defined by the target)?.
1197     *
1198     * Static models are based on assumptions instead of in machine learning
1199     * backends results.
1200     *
1201     * @return bool
1202     */
1203    public function is_static() {
1204        return (bool)$this->get_target()->based_on_assumptions();
1205    }
1206
1207    /**
1208     * Is this model enabled?
1209     *
1210     * @return bool
1211     */
1212    public function is_enabled() {
1213        return (bool)$this->model->enabled;
1214    }
1215
1216    /**
1217     * Is this model already trained?
1218     *
1219     * @return bool
1220     */
1221    public function is_trained() {
1222        // Models which targets are based on assumptions do not need training.
1223        return (bool)$this->model->trained || $this->is_static();
1224    }
1225
1226    /**
1227     * Marks the model as trained
1228     *
1229     * @return void
1230     */
1231    public function mark_as_trained() {
1232        global $DB;
1233
1234        \core_analytics\manager::check_can_manage_models();
1235
1236        $this->model->trained = 1;
1237        $DB->update_record('analytics_models', $this->model);
1238    }
1239
1240    /**
1241     * Get the contexts with predictions.
1242     *
1243     * @param bool $skiphidden Skip hidden predictions
1244     * @return \stdClass[]
1245     */
1246    public function get_predictions_contexts($skiphidden = true) {
1247        global $DB, $USER;
1248
1249        $sql = "SELECT DISTINCT ap.contextid FROM {analytics_predictions} ap
1250                  JOIN {context} ctx ON ctx.id = ap.contextid
1251                 WHERE ap.modelid = :modelid";
1252        $params = array('modelid' => $this->model->id);
1253
1254        if ($skiphidden) {
1255            $sql .= " AND NOT EXISTS (
1256              SELECT 1
1257                FROM {analytics_prediction_actions} apa
1258               WHERE apa.predictionid = ap.id AND apa.userid = :userid AND
1259                     (apa.actionname = :fixed OR apa.actionname = :notuseful OR
1260                     apa.actionname = :useful OR apa.actionname = :notapplicable OR
1261                     apa.actionname = :incorrectlyflagged)
1262            )";
1263            $params['userid'] = $USER->id;
1264            $params['fixed'] = \core_analytics\prediction::ACTION_FIXED;
1265            $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL;
1266            $params['useful'] = \core_analytics\prediction::ACTION_USEFUL;
1267            $params['notapplicable'] = \core_analytics\prediction::ACTION_NOT_APPLICABLE;
1268            $params['incorrectlyflagged'] = \core_analytics\prediction::ACTION_INCORRECTLY_FLAGGED;
1269        }
1270
1271        return $DB->get_records_sql($sql, $params);
1272    }
1273
1274    /**
1275     * Has this model generated predictions?
1276     *
1277     * We don't check analytics_predictions table because targets have the ability to
1278     * ignore some predicted values, if that is the case predictions are not even stored
1279     * in db.
1280     *
1281     * @return bool
1282     */
1283    public function any_prediction_obtained() {
1284        global $DB;
1285        return $DB->record_exists('analytics_predict_samples',
1286            array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting));
1287    }
1288
1289    /**
1290     * Whether this model generates insights or not (defined by the model's target).
1291     *
1292     * @return bool
1293     */
1294    public function uses_insights() {
1295        $target = $this->get_target();
1296        return $target::uses_insights();
1297    }
1298
1299    /**
1300     * Whether predictions exist for this context.
1301     *
1302     * @param \context $context
1303     * @return bool
1304     */
1305    public function predictions_exist(\context $context) {
1306        global $DB;
1307
1308        // Filters out previous predictions keeping only the last time range one.
1309        $select = "modelid = :modelid AND contextid = :contextid";
1310        $params = array('modelid' => $this->model->id, 'contextid' => $context->id);
1311        return $DB->record_exists_select('analytics_predictions', $select, $params);
1312    }
1313
1314    /**
1315     * Gets the predictions for this context.
1316     *
1317     * @param \context $context
1318     * @param bool $skiphidden Skip hidden predictions
1319     * @param int $page The page of results to fetch. False for all results.
1320     * @param int $perpage The max number of results to fetch. Ignored if $page is false.
1321     * @return array($total, \core_analytics\prediction[])
1322     */
1323    public function get_predictions(\context $context, $skiphidden = true, $page = false, $perpage = 100) {
1324        global $DB, $USER;
1325
1326        \core_analytics\manager::check_can_list_insights($context);
1327
1328        // Filters out previous predictions keeping only the last time range one.
1329        $sql = "SELECT ap.*
1330                  FROM {analytics_predictions} ap
1331                  JOIN (
1332                    SELECT sampleid, max(rangeindex) AS rangeindex
1333                      FROM {analytics_predictions}
1334                     WHERE modelid = :modelidsubap and contextid = :contextidsubap
1335                    GROUP BY sampleid
1336                  ) apsub
1337                  ON ap.sampleid = apsub.sampleid AND ap.rangeindex = apsub.rangeindex
1338                WHERE ap.modelid = :modelid and ap.contextid = :contextid";
1339
1340        $params = array('modelid' => $this->model->id, 'contextid' => $context->id,
1341            'modelidsubap' => $this->model->id, 'contextidsubap' => $context->id);
1342
1343        if ($skiphidden) {
1344            $sql .= " AND NOT EXISTS (
1345              SELECT 1
1346                FROM {analytics_prediction_actions} apa
1347               WHERE apa.predictionid = ap.id AND apa.userid = :userid AND
1348                     (apa.actionname = :fixed OR apa.actionname = :notuseful OR
1349                     apa.actionname = :useful OR apa.actionname = :notapplicable OR
1350                     apa.actionname = :incorrectlyflagged)
1351            )";
1352            $params['userid'] = $USER->id;
1353            $params['fixed'] = \core_analytics\prediction::ACTION_FIXED;
1354            $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL;
1355            $params['useful'] = \core_analytics\prediction::ACTION_USEFUL;
1356            $params['notapplicable'] = \core_analytics\prediction::ACTION_NOT_APPLICABLE;
1357            $params['incorrectlyflagged'] = \core_analytics\prediction::ACTION_INCORRECTLY_FLAGGED;
1358        }
1359
1360        $sql .= " ORDER BY ap.timecreated DESC";
1361        if (!$predictions = $DB->get_records_sql($sql, $params)) {
1362            return array();
1363        }
1364
1365        // Get predicted samples' ids.
1366        $sampleids = array_map(function($prediction) {
1367            return $prediction->sampleid;
1368        }, $predictions);
1369
1370        list($unused, $samplesdata) = $this->get_samples($sampleids);
1371
1372        $current = 0;
1373
1374        if ($page !== false) {
1375            $offset = $page * $perpage;
1376            $limit = $offset + $perpage;
1377        }
1378
1379        foreach ($predictions as $predictionid => $predictiondata) {
1380
1381            $sampleid = $predictiondata->sampleid;
1382
1383            // Filter out predictions which samples are not available anymore.
1384            if (empty($samplesdata[$sampleid])) {
1385                unset($predictions[$predictionid]);
1386                continue;
1387            }
1388
1389            // Return paginated dataset - we cannot paginate in the DB because we post filter the list.
1390            if ($page === false || ($current >= $offset && $current < $limit)) {
1391                // Replace \stdClass object by \core_analytics\prediction objects.
1392                $prediction = new \core_analytics\prediction($predictiondata, $samplesdata[$sampleid]);
1393                $predictions[$predictionid] = $prediction;
1394            } else {
1395                unset($predictions[$predictionid]);
1396            }
1397
1398            $current++;
1399        }
1400
1401        if (empty($predictions)) {
1402            return array();
1403        }
1404
1405        return [$current, $predictions];
1406    }
1407
1408    /**
1409     * Returns the actions executed by users on the predictions.
1410     *
1411     * @param  \context|null $context
1412     * @return \moodle_recordset
1413     */
1414    public function get_prediction_actions(?\context $context): \moodle_recordset {
1415        global $DB;
1416
1417        $sql = "SELECT apa.id, apa.predictionid, apa.userid, apa.actionname, apa.timecreated,
1418                       ap.contextid, ap.sampleid, ap.rangeindex, ap.prediction, ap.predictionscore
1419                  FROM {analytics_prediction_actions} apa
1420                  JOIN {analytics_predictions} ap ON ap.id = apa.predictionid
1421                 WHERE ap.modelid = :modelid";
1422        $params = ['modelid' => $this->model->id];
1423
1424        if ($context) {
1425            $sql .= " AND ap.contextid = :contextid";
1426            $params['contextid'] = $context->id;
1427        }
1428
1429        return $DB->get_recordset_sql($sql, $params);
1430    }
1431
1432    /**
1433     * Returns the sample data of a prediction.
1434     *
1435     * @param \stdClass $predictionobj
1436     * @return array
1437     */
1438    public function prediction_sample_data($predictionobj) {
1439
1440        list($unused, $samplesdata) = $this->get_samples(array($predictionobj->sampleid));
1441
1442        if (empty($samplesdata[$predictionobj->sampleid])) {
1443            throw new \moodle_exception('errorsamplenotavailable', 'analytics');
1444        }
1445
1446        return $samplesdata[$predictionobj->sampleid];
1447    }
1448
1449    /**
1450     * Returns the samples data of the provided predictions.
1451     *
1452     * @param \stdClass[] $predictionrecords
1453     * @return array
1454     */
1455    public function predictions_sample_data(array $predictionrecords): array {
1456
1457        $sampleids = [];
1458        foreach ($predictionrecords as $predictionobj) {
1459            $sampleids[] = $predictionobj->sampleid;
1460        }
1461        list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
1462
1463        return $samplesdata;
1464    }
1465
1466    /**
1467     * Appends the calculation info to the samples data.
1468     *
1469     * @param   \stdClass[] $predictionrecords
1470     * @param   array $samplesdata
1471     * @return  array
1472     */
1473    public function append_calculations_info(array $predictionrecords, array $samplesdata): array {
1474
1475        if ($extrainfo = calculation_info::pull_info($predictionrecords)) {
1476            foreach ($samplesdata as $sampleid => $data) {
1477                // The extra info come prefixed by extra: so we will not have overwrites here.
1478                $samplesdata[$sampleid] = $samplesdata[$sampleid] + $extrainfo[$sampleid];
1479            }
1480        }
1481        return $samplesdata;
1482    }
1483
1484    /**
1485     * Returns the description of a sample
1486     *
1487     * @param \core_analytics\prediction $prediction
1488     * @return array 2 elements: list(string, \renderable)
1489     */
1490    public function prediction_sample_description(\core_analytics\prediction $prediction) {
1491        return $this->get_analyser()->sample_description($prediction->get_prediction_data()->sampleid,
1492            $prediction->get_prediction_data()->contextid, $prediction->get_sample_data());
1493    }
1494
1495    /**
1496     * Returns the default output directory for prediction processors
1497     *
1498     * @return string
1499     */
1500    public static function default_output_dir(): string {
1501        global $CFG;
1502
1503        return $CFG->dataroot . DIRECTORY_SEPARATOR . 'models';
1504    }
1505
1506    /**
1507     * Returns the output directory for prediction processors.
1508     *
1509     * Directory structure as follows:
1510     * - Evaluation runs:
1511     *   models/$model->id/$model->version/evaluation/$model->timesplitting
1512     * - Training  & prediction runs:
1513     *   models/$model->id/$model->version/execution
1514     *
1515     * @param array $subdirs
1516     * @param bool $onlymodelid Preference over $subdirs
1517     * @return string
1518     */
1519    public function get_output_dir($subdirs = array(), $onlymodelid = false) {
1520        $subdirstr = '';
1521        foreach ($subdirs as $subdir) {
1522            $subdirstr .= DIRECTORY_SEPARATOR . $subdir;
1523        }
1524
1525        $outputdir = get_config('analytics', 'modeloutputdir');
1526        if (empty($outputdir)) {
1527            // Apply default value.
1528            $outputdir = self::default_output_dir();
1529        }
1530
1531        // Append model id.
1532        $outputdir .= DIRECTORY_SEPARATOR . $this->model->id;
1533        if (!$onlymodelid) {
1534            // Append version + subdirs.
1535            $outputdir .= DIRECTORY_SEPARATOR . $this->model->version . $subdirstr;
1536        }
1537
1538        make_writable_directory($outputdir);
1539
1540        return $outputdir;
1541    }
1542
1543    /**
1544     * Returns a unique id for this model.
1545     *
1546     * This id should be unique for this site.
1547     *
1548     * @return string
1549     */
1550    public function get_unique_id() {
1551        global $CFG;
1552
1553        if (!is_null($this->uniqueid)) {
1554            return $this->uniqueid;
1555        }
1556
1557        // Generate a unique id for this site, this model and this time splitting method, considering the last time
1558        // that the model target and indicators were updated.
1559        $ids = array($CFG->wwwroot, $CFG->prefix, $this->model->id, $this->model->version);
1560        $this->uniqueid = sha1(implode('$$', $ids));
1561
1562        return $this->uniqueid;
1563    }
1564
1565    /**
1566     * Exports the model data for displaying it in a template.
1567     *
1568     * @param \renderer_base $output The renderer to use for exporting
1569     * @return \stdClass
1570     */
1571    public function export(\renderer_base $output) {
1572
1573        \core_analytics\manager::check_can_manage_models();
1574
1575        $data = clone $this->model;
1576
1577        $data->modelname = format_string($this->get_name());
1578        $data->name = $this->inplace_editable_name()->export_for_template($output);
1579        $data->target = $this->get_target()->get_name();
1580        $data->targetclass = $this->get_target()->get_id();
1581
1582        if ($timesplitting = $this->get_time_splitting()) {
1583            $data->timesplitting = $timesplitting->get_name();
1584        }
1585
1586        $data->indicators = array();
1587        foreach ($this->get_indicators() as $indicator) {
1588            $data->indicators[] = $indicator->get_name();
1589        }
1590        return $data;
1591    }
1592
1593    /**
1594     * Exports the model data to a zip file.
1595     *
1596     * @param string $zipfilename
1597     * @param bool $includeweights Include the model weights if available
1598     * @return string Zip file path
1599     */
1600    public function export_model(string $zipfilename, bool $includeweights = true) : string {
1601
1602        \core_analytics\manager::check_can_manage_models();
1603
1604        $modelconfig = new model_config($this);
1605        return $modelconfig->export($zipfilename, $includeweights);
1606    }
1607
1608    /**
1609     * Imports the provided model.
1610     *
1611     * Note that this method assumes that model_config::check_dependencies has already been called.
1612     *
1613     * @throws \moodle_exception
1614     * @param  string $zipfilepath Zip file path
1615     * @return \core_analytics\model
1616     */
1617    public static function import_model(string $zipfilepath) : \core_analytics\model {
1618
1619        \core_analytics\manager::check_can_manage_models();
1620
1621        $modelconfig = new \core_analytics\model_config();
1622        return $modelconfig->import($zipfilepath);
1623    }
1624
1625    /**
1626     * Can this model be exported?
1627     *
1628     * @return bool
1629     */
1630    public function can_export_configuration() : bool {
1631
1632        if (empty($this->model->timesplitting)) {
1633            return false;
1634        }
1635        if (!$this->get_indicators()) {
1636            return false;
1637        }
1638
1639        if ($this->is_static()) {
1640            return false;
1641        }
1642
1643        return true;
1644    }
1645
1646    /**
1647     * Returns the model logs data.
1648     *
1649     * @param int $limitfrom
1650     * @param int $limitnum
1651     * @return \stdClass[]
1652     */
1653    public function get_logs($limitfrom = 0, $limitnum = 0) {
1654        global $DB;
1655
1656        \core_analytics\manager::check_can_manage_models();
1657
1658        return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*',
1659            $limitfrom, $limitnum);
1660    }
1661
1662    /**
1663     * Merges all training data files into one and returns it.
1664     *
1665     * @return \stored_file|false
1666     */
1667    public function get_training_data() {
1668
1669        \core_analytics\manager::check_can_manage_models();
1670
1671        $timesplittingid = $this->get_time_splitting()->get_id();
1672        return \core_analytics\dataset_manager::export_training_data($this->get_id(), $timesplittingid);
1673    }
1674
1675    /**
1676     * Has the model been trained using data from this site?
1677     *
1678     * This method is useful to determine if a trained model can be evaluated as
1679     * we can not use the same data for training and for evaluation.
1680     *
1681     * @return bool
1682     */
1683    public function trained_locally() : bool {
1684        global $DB;
1685
1686        if (!$this->is_trained() || $this->is_static()) {
1687            // Early exit.
1688            return false;
1689        }
1690
1691        if ($DB->record_exists('analytics_train_samples', ['modelid' => $this->model->id])) {
1692            return true;
1693        }
1694
1695        return false;
1696    }
1697
1698    /**
1699     * Flag the provided file as used for training or prediction.
1700     *
1701     * @param \stored_file $file
1702     * @param string $action
1703     * @return void
1704     */
1705    protected function flag_file_as_used(\stored_file $file, $action) {
1706        global $DB;
1707
1708        $usedfile = new \stdClass();
1709        $usedfile->modelid = $this->model->id;
1710        $usedfile->fileid = $file->get_id();
1711        $usedfile->action = $action;
1712        $usedfile->time = time();
1713        $DB->insert_record('analytics_used_files', $usedfile);
1714    }
1715
1716    /**
1717     * Log the evaluation results in the database.
1718     *
1719     * @param string $timesplittingid
1720     * @param float $score
1721     * @param string $dir
1722     * @param array $info
1723     * @param string $evaluationmode
1724     * @return int The inserted log id
1725     */
1726    protected function log_result($timesplittingid, $score, $dir = false, $info = false, $evaluationmode = 'configuration') {
1727        global $DB, $USER;
1728
1729        $log = new \stdClass();
1730        $log->modelid = $this->get_id();
1731        $log->version = $this->model->version;
1732        $log->evaluationmode = $evaluationmode;
1733        $log->target = $this->model->target;
1734        $log->indicators = $this->model->indicators;
1735        $log->timesplitting = $timesplittingid;
1736        $log->dir = $dir;
1737        if ($info) {
1738            // Ensure it is not an associative array.
1739            $log->info = json_encode(array_values($info));
1740        }
1741        $log->score = $score;
1742        $log->timecreated = time();
1743        $log->usermodified = $USER->id;
1744
1745        return $DB->insert_record('analytics_models_log', $log);
1746    }
1747
1748    /**
1749     * Utility method to return indicator class names from a list of indicator objects
1750     *
1751     * @param \core_analytics\local\indicator\base[] $indicators
1752     * @return string[]
1753     */
1754    private static function indicator_classes($indicators) {
1755
1756        // What we want to check and store are the indicator classes not the keys.
1757        $indicatorclasses = array();
1758        foreach ($indicators as $indicator) {
1759            if (!\core_analytics\manager::is_valid($indicator, '\core_analytics\local\indicator\base')) {
1760                if (!is_object($indicator) && !is_scalar($indicator)) {
1761                    $indicator = strval($indicator);
1762                } else if (is_object($indicator)) {
1763                    $indicator = '\\' . get_class($indicator);
1764                }
1765                throw new \moodle_exception('errorinvalidindicator', 'analytics', '', $indicator);
1766            }
1767            $indicatorclasses[] = $indicator->get_id();
1768        }
1769
1770        return $indicatorclasses;
1771    }
1772
1773    /**
1774     * Clears the model training and prediction data.
1775     *
1776     * Executed after updating model critical elements like the time splitting method
1777     * or the indicators.
1778     *
1779     * @return void
1780     */
1781    public function clear() {
1782        global $DB, $USER;
1783
1784        \core_analytics\manager::check_can_manage_models();
1785
1786        // Delete current model version stored stuff.
1787        $predictor = $this->get_predictions_processor(false);
1788        if ($predictor->is_ready() !== true) {
1789            $predictorname = \core_analytics\manager::get_predictions_processor_name($predictor);
1790            debugging('Prediction processor ' . $predictorname . ' is not ready to be used. Model ' .
1791                $this->model->id . ' could not be cleared.');
1792        } else {
1793            $predictor->clear_model($this->get_unique_id(), $this->get_output_dir());
1794        }
1795
1796        $DB->delete_records_select('analytics_prediction_actions', "predictionid IN
1797            (SELECT id FROM {analytics_predictions} WHERE modelid = :modelid)", ['modelid' => $this->get_id()]);
1798
1799        $DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
1800        $DB->delete_records('analytics_predict_samples', array('modelid' => $this->model->id));
1801        $DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
1802        $DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
1803        $DB->delete_records('analytics_used_analysables', array('modelid' => $this->model->id));
1804
1805        // Purge all generated files.
1806        \core_analytics\dataset_manager::clear_model_files($this->model->id);
1807
1808        // We don't expect people to clear models regularly and the cost of filling the cache is
1809        // 1 db read per context.
1810        $this->purge_insights_cache();
1811
1812        if (!$this->is_static()) {
1813            $this->model->trained = 0;
1814        }
1815
1816        $this->model->timemodified = time();
1817        $this->model->usermodified = $USER->id;
1818        $DB->update_record('analytics_models', $this->model);
1819    }
1820
1821    /**
1822     * Returns the name of the model.
1823     *
1824     * By default, models use their target's name as their own name. They can have their explicit name, too. In which
1825     * case, the explicit name is used instead of the default one.
1826     *
1827     * @return string|lang_string
1828     */
1829    public function get_name() {
1830
1831        if (trim($this->model->name) === '') {
1832            return $this->get_target()->get_name();
1833
1834        } else {
1835            return $this->model->name;
1836        }
1837    }
1838
1839    /**
1840     * Renames the model to the given name.
1841     *
1842     * When given an empty string, the model falls back to using the associated target's name as its name.
1843     *
1844     * @param string $name The new name for the model, empty string for using the default name.
1845     */
1846    public function rename(string $name) {
1847        global $DB, $USER;
1848
1849        $this->model->name = $name;
1850        $this->model->timemodified = time();
1851        $this->model->usermodified = $USER->id;
1852
1853        $DB->update_record('analytics_models', $this->model);
1854    }
1855
1856    /**
1857     * Returns an inplace editable element with the model's name.
1858     *
1859     * @return \core\output\inplace_editable
1860     */
1861    public function inplace_editable_name() {
1862
1863        $displayname = format_string($this->get_name());
1864
1865        return new \core\output\inplace_editable('core_analytics', 'modelname', $this->model->id,
1866            has_capability('moodle/analytics:managemodels', \context_system::instance()), $displayname, $this->model->name);
1867    }
1868
1869    /**
1870     * Returns true if the time-splitting method used by this model is invalid for this model.
1871     * @return  bool
1872     */
1873    public function invalid_timesplitting_selected(): bool {
1874        $currenttimesplitting = $this->model->timesplitting;
1875        if (empty($currenttimesplitting)) {
1876            // Not set is different from invalid. This function is used to identify invalid
1877            // time-splittings.
1878            return false;
1879        }
1880
1881        $potentialtimesplittings = $this->get_potential_timesplittings();
1882        if ($currenttimesplitting && empty($potentialtimesplittings[$currenttimesplitting])) {
1883            return true;
1884        }
1885
1886        return false;
1887    }
1888
1889    /**
1890     * Adds the id from {analytics_predictions} db table to the prediction \stdClass objects.
1891     *
1892     * @param  \stdClass[] $predictionrecords
1893     * @return \stdClass[] The prediction records including their ids in {analytics_predictions} db table.
1894     */
1895    private function add_prediction_ids($predictionrecords) {
1896        global $DB;
1897
1898        $firstprediction = reset($predictionrecords);
1899
1900        $contextids = array_map(function($predictionobj) {
1901            return $predictionobj->contextid;
1902        }, $predictionrecords);
1903
1904        // Limited to 30000 records as a middle point between the ~65000 params limit in pgsql and the size limit for mysql which
1905        // can be increased if required up to a reasonable point.
1906        $chunks = array_chunk($contextids, 30000);
1907        foreach ($chunks as $contextidschunk) {
1908            list($contextsql, $contextparams) = $DB->get_in_or_equal($contextidschunk, SQL_PARAMS_NAMED);
1909
1910            // We select the fields that will allow us to map ids to $predictionrecords. Given that we already filter by modelid
1911            // we have enough with sampleid and rangeindex. The reason is that the sampleid relation to a site is N - 1.
1912            $fields = 'id, sampleid, rangeindex';
1913
1914            // We include the contextid and the timecreated filter to reduce the number of records in $dbpredictions. We can not
1915            // add as many OR conditions as records in $predictionrecords.
1916            $sql = "SELECT $fields
1917                      FROM {analytics_predictions}
1918                     WHERE modelid = :modelid
1919                           AND contextid $contextsql
1920                           AND timecreated >= :firsttimecreated";
1921            $params = $contextparams + ['modelid' => $this->model->id, 'firsttimecreated' => $firstprediction->timecreated];
1922            $dbpredictions = $DB->get_recordset_sql($sql, $params);
1923            foreach ($dbpredictions as $id => $dbprediction) {
1924                // The append_rangeindex implementation is the same regardless of the time splitting method in use.
1925                $uniqueid = $this->get_time_splitting()->append_rangeindex($dbprediction->sampleid, $dbprediction->rangeindex);
1926                $predictionrecords[$uniqueid]->id = $dbprediction->id;
1927            }
1928        }
1929
1930        return $predictionrecords;
1931    }
1932
1933    /**
1934     * Wrapper around analyser's get_samples to skip DB's max-number-of-params exception.
1935     *
1936     * @param  array  $sampleids
1937     * @return array
1938     */
1939    public function get_samples(array $sampleids): array {
1940
1941        if (empty($sampleids)) {
1942            throw new \coding_exception('No sample ids provided');
1943        }
1944
1945        $chunksize = count($sampleids);
1946
1947        // We start with just 1 chunk, if it is too large for the db we split the list of sampleids in 2 and we
1948        // try again. We repeat this process until the chunk is small enough for the db engine to process. The
1949        // >= has been added in case there are other \dml_read_exceptions unrelated to the max number of params.
1950        while (empty($done) && $chunksize >= 1) {
1951
1952            $chunks = array_chunk($sampleids, $chunksize);
1953            $allsampleids = [];
1954            $allsamplesdata = [];
1955
1956            foreach ($chunks as $index => $chunk) {
1957
1958                try {
1959                    list($chunksampleids, $chunksamplesdata) = $this->get_analyser()->get_samples($chunk);
1960                } catch (\dml_read_exception $e) {
1961
1962                    // Reduce the chunksize, we use floor() so the $chunksize is always less than the previous $chunksize value.
1963                    $chunksize = floor($chunksize / 2);
1964                    break;
1965                }
1966
1967                // We can sum as these two arrays are indexed by sampleid and there are no collisions.
1968                $allsampleids = $allsampleids + $chunksampleids;
1969                $allsamplesdata = $allsamplesdata + $chunksamplesdata;
1970
1971                if ($index === count($chunks) - 1) {
1972                    // We successfully processed all the samples in all chunks, we are done.
1973                    $done = true;
1974                }
1975            }
1976        }
1977
1978        if (empty($done)) {
1979            if (!empty($e)) {
1980                // Throw the last exception we caught, the \dml_read_exception we have been catching is unrelated to the max number
1981                // of param's exception.
1982                throw new \dml_read_exception($e);
1983            } else {
1984                throw new \coding_exception('We should never reach this point, there is a bug in ' .
1985                    'core_analytics\\model::get_samples\'s code');
1986            }
1987        }
1988        return [$allsampleids, $allsamplesdata];
1989    }
1990
1991    /**
1992     * Contexts where this model should be active.
1993     *
1994     * @return \context[] Empty array if there are no context restrictions.
1995     */
1996    public function get_contexts() {
1997        if ($this->contexts !== null) {
1998            return $this->contexts;
1999        }
2000
2001        if (!$this->model->contextids) {
2002            $this->contexts = [];
2003            return $this->contexts;
2004        }
2005        $contextids = json_decode($this->model->contextids);
2006
2007        // We don't expect this list to be massive as contexts need to be selected manually using the edit model form.
2008        $this->contexts = array_map(function($contextid) {
2009            return \context::instance_by_id($contextid, IGNORE_MISSING);
2010        }, $contextids);
2011
2012        return $this->contexts;
2013    }
2014
2015    /**
2016     * Purges the insights cache.
2017     */
2018    private function purge_insights_cache() {
2019        $cache = \cache::make('core', 'contextwithinsights');
2020        $cache->purge();
2021    }
2022
2023    /**
2024     * Increases system memory and time limits.
2025     *
2026     * @return void
2027     */
2028    private function heavy_duty_mode() {
2029        if (ini_get('memory_limit') != -1) {
2030            raise_memory_limit(MEMORY_HUGE);
2031        }
2032        \core_php_time_limit::raise();
2033    }
2034}
2035