1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * A scheduled task.
19 *
20 * @package    core
21 * @copyright  2013 onwards Martin Dougiamas  http://dougiamas.com
22 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24namespace core\task;
25
26use core_tag_collection, core_tag_tag, core_tag_area, stdClass;
27
28/**
29 * Simple task to run the tag cron.
30 */
31class tag_cron_task extends scheduled_task {
32
33    /**
34     * Get a descriptive name for this task (shown to admins).
35     *
36     * @return string
37     */
38    public function get_name() {
39        return get_string('tasktagcron', 'admin');
40    }
41
42    /**
43     * Do the job.
44     * Throw exceptions on errors (the job will be retried).
45     */
46    public function execute() {
47        global $CFG;
48
49        if (!empty($CFG->usetags)) {
50            $this->compute_correlations();
51            $this->cleanup();
52        }
53    }
54
55    /**
56     * Calculates and stores the correlated tags of all tags.
57     *
58     * The correlations are stored in the 'tag_correlation' table.
59     *
60     * Two tags are correlated if they appear together a lot. Ex.: Users tagged with "computers"
61     * will probably also be tagged with "algorithms".
62     *
63     * The rationale for the 'tag_correlation' table is performance. It works as a cache
64     * for a potentially heavy load query done at the 'tag_instance' table. So, the
65     * 'tag_correlation' table stores redundant information derived from the 'tag_instance' table.
66     *
67     * @param int $mincorrelation Only tags with more than $mincorrelation correlations will be identified.
68     */
69    public function compute_correlations($mincorrelation = 2) {
70        global $DB;
71
72        // This mighty one line query fetches a row from the database for every
73        // individual tag correlation. We then need to process the rows collecting
74        // the correlations for each tag id.
75        // The fields used by this query are as follows:
76        //   tagid         : This is the tag id, there should be at least $mincorrelation
77        //                   rows for each tag id.
78        //   correlation   : This is the tag id that correlates to the above tagid field.
79        //   correlationid : This is the id of the row in the tag_correlation table that
80        //                   relates to the tagid field and will be NULL if there are no
81        //                   existing correlations.
82        $sql = 'SELECT pairs.tagid, pairs.correlation, pairs.ocurrences, co.id AS correlationid
83                  FROM (
84                           SELECT ta.tagid, tb.tagid AS correlation, COUNT(*) AS ocurrences
85                             FROM {tag_instance} ta
86                             JOIN {tag} tga ON ta.tagid = tga.id
87                             JOIN {tag_instance} tb ON (ta.itemtype = tb.itemtype AND ta.component = tb.component
88                                AND ta.itemid = tb.itemid AND ta.tagid <> tb.tagid)
89                             JOIN {tag} tgb ON tb.tagid = tgb.id AND tgb.tagcollid = tga.tagcollid
90                         GROUP BY ta.tagid, tb.tagid
91                           HAVING COUNT(*) > :mincorrelation
92                       ) pairs
93             LEFT JOIN {tag_correlation} co ON co.tagid = pairs.tagid
94              ORDER BY pairs.tagid ASC, pairs.ocurrences DESC, pairs.correlation ASC';
95        $rs = $DB->get_recordset_sql($sql, array('mincorrelation' => $mincorrelation));
96
97        // Set up an empty tag correlation object.
98        $tagcorrelation = new stdClass;
99        $tagcorrelation->id = null;
100        $tagcorrelation->tagid = null;
101        $tagcorrelation->correlatedtags = array();
102
103        // We store each correlation id in this array so we can remove any correlations
104        // that no longer exist.
105        $correlations = array();
106
107        // Iterate each row of the result set and build them into tag correlations.
108        // We add all of a tag's correlations to $tagcorrelation->correlatedtags[]
109        // then save the $tagcorrelation object.
110        foreach ($rs as $row) {
111            if ($row->tagid != $tagcorrelation->tagid) {
112                // The tag id has changed so we have all of the correlations for this tag.
113                $tagcorrelationid = $this->process_computed_correlation($tagcorrelation);
114                if ($tagcorrelationid) {
115                    $correlations[] = $tagcorrelationid;
116                }
117                // Now we reset the tag correlation object so we can reuse it and set it
118                // up for the current record.
119                $tagcorrelation = new stdClass;
120                $tagcorrelation->id = $row->correlationid;
121                $tagcorrelation->tagid = $row->tagid;
122                $tagcorrelation->correlatedtags = array();
123            }
124            // Save the correlation on the tag correlation object.
125            $tagcorrelation->correlatedtags[] = $row->correlation;
126        }
127        // Update the current correlation after the last record.
128        $tagcorrelationid = $this->process_computed_correlation($tagcorrelation);
129        if ($tagcorrelationid) {
130            $correlations[] = $tagcorrelationid;
131        }
132
133        // Close the recordset.
134        $rs->close();
135
136        // Remove any correlations that weren't just identified.
137        if (empty($correlations)) {
138            // There are no tag correlations.
139            $DB->delete_records('tag_correlation');
140        } else {
141            list($sql, $params) = $DB->get_in_or_equal($correlations,
142                    SQL_PARAMS_NAMED, 'param0000', false);
143            $DB->delete_records_select('tag_correlation', 'id '.$sql, $params);
144        }
145    }
146
147    /**
148     * Clean up the tag tables, making sure all tagged object still exists.
149     *
150     * This method is called from cron.
151     *
152     * This should normally not be necessary, but in case related tags are not deleted
153     * when the tagged record is removed, this should be done once in a while, perhaps
154     * on an occasional cron run.  On a site with lots of tags, this could become an
155     * expensive function to call.
156     */
157    public function cleanup() {
158        global $DB;
159
160        // Get ids to delete from instances where the tag has been deleted. This should never happen apparently.
161        $sql = "SELECT ti.id
162                  FROM {tag_instance} ti
163             LEFT JOIN {tag} t ON t.id = ti.tagid
164                 WHERE t.id IS null";
165        $tagids = $DB->get_records_sql($sql);
166        $tagarray = array();
167        foreach ($tagids as $tagid) {
168            $tagarray[] = $tagid->id;
169        }
170
171        // Next get ids from instances that have an owner that has been deleted.
172        $sql = "SELECT ti.id
173                  FROM {tag_instance} ti, {user} u
174                 WHERE ti.itemid = u.id
175                   AND ti.itemtype = 'user'
176                   AND ti.component = 'core'
177                   AND u.deleted = 1";
178        $tagids = $DB->get_records_sql($sql);
179        foreach ($tagids as $tagid) {
180            $tagarray[] = $tagid->id;
181        }
182
183        // Get the other itemtypes.
184        $sql = "SELECT DISTINCT component, itemtype
185                  FROM {tag_instance}
186                 WHERE itemtype <> 'user' or component <> 'core'";
187        $tagareas = $DB->get_recordset_sql($sql);
188        foreach ($tagareas as $tagarea) {
189            $sql = 'SELECT ti.id
190                      FROM {tag_instance} ti
191                 LEFT JOIN {' . $tagarea->itemtype . '} it ON it.id = ti.itemid
192                     WHERE it.id IS null
193                     AND ti.itemtype = ? AND ti.component = ?';
194            $tagids = $DB->get_records_sql($sql, array($tagarea->itemtype, $tagarea->component));
195            foreach ($tagids as $tagid) {
196                $tagarray[] = $tagid->id;
197            }
198        }
199        $tagareas->close();
200
201        // Get instances for each of the ids to be deleted.
202        if (count($tagarray) > 0) {
203            list($sqlin, $params) = $DB->get_in_or_equal($tagarray);
204            $sql = "SELECT ti.*, COALESCE(t.name, 'deleted') AS name, COALESCE(t.rawname, 'deleted') AS rawname
205                      FROM {tag_instance} ti
206                 LEFT JOIN {tag} t ON t.id = ti.tagid
207                     WHERE ti.id $sqlin";
208            $instances = $DB->get_records_sql($sql, $params);
209            $this->bulk_delete_instances($instances);
210        }
211
212        core_tag_collection::cleanup_unused_tags();
213    }
214
215    /**
216     * This function processes a tag correlation and makes changes in the database as required.
217     *
218     * The tag correlation object needs have both a tagid property and a correlatedtags property that is an array.
219     *
220     * @param   stdClass $tagcorrelation
221     * @return  int/bool The id of the tag correlation that was just processed or false.
222     */
223    public function process_computed_correlation(stdClass $tagcorrelation) {
224        global $DB;
225
226        // You must provide a tagid and correlatedtags must be set and be an array.
227        if (empty($tagcorrelation->tagid) || !isset($tagcorrelation->correlatedtags) ||
228                !is_array($tagcorrelation->correlatedtags)) {
229            return false;
230        }
231
232        $tagcorrelation->correlatedtags = join(',', $tagcorrelation->correlatedtags);
233        if (!empty($tagcorrelation->id)) {
234            // The tag correlation already exists so update it.
235            $DB->update_record('tag_correlation', $tagcorrelation);
236        } else {
237            // This is a new correlation to insert.
238            $tagcorrelation->id = $DB->insert_record('tag_correlation', $tagcorrelation);
239        }
240        return $tagcorrelation->id;
241    }
242
243    /**
244     * This function will delete numerous tag instances efficiently.
245     * This removes tag instances only. It doesn't check to see if it is the last use of a tag.
246     *
247     * @param array $instances An array of tag instance objects with the addition of the tagname and tagrawname
248     *        (used for recording a delete event).
249     */
250    public function bulk_delete_instances($instances) {
251        global $DB;
252
253        $instanceids = array();
254        foreach ($instances as $instance) {
255            $instanceids[] = $instance->id;
256        }
257
258        // This is a multi db compatible method of creating the correct sql when using the 'IN' value.
259        // $insql is the sql statement, $params are the id numbers.
260        list($insql, $params) = $DB->get_in_or_equal($instanceids);
261        $sql = 'id ' . $insql;
262        $DB->delete_records_select('tag_instance', $sql, $params);
263
264        // Now go through and record each tag individually with the event system.
265        foreach ($instances as $instance) {
266            // Trigger tag removed event (i.e. The tag instance has been removed).
267            \core\event\tag_removed::create_from_tag_instance($instance, $instance->name,
268                    $instance->rawname, true)->trigger();
269        }
270    }
271}
272