1<?php
2/***********************************************************
3 Copyright (C) 2019 Siemens AG
4 Author: Gaurav Mishra <mishra.gaurav@siemens.com>
5
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 version 2 as published by the Free Software Foundation.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
18 ***********************************************************/
19
20use Fossology\Lib\Db\DbManager;
21
22/**
23 * @file
24 * @brief Migrate DB from release 3.5.0 to 3.6.0 with new column for decision
25 * tables.
26 */
27
28
29/**
30 * @brief calculate number of records and return offset
31 *
32 * The function gets count of the values from database and check if there are
33 * more than 10,000 records then return 10,000, otherwise return number of
34 * records.
35 * @param DbManager $dbManager
36 * @param string $tableName
37 */
38function calculateNumberOfRecordsToBeProcessed($dbManager, $tableName, $columnName)
39{
40  $sql = "SELECT count(*) AS cnt FROM $tableName WHERE $tableName.$columnName is NULL;";
41  $totalPfile = $dbManager->getSingleRow($sql, [], __METHOD__ .
42    ".calculateNumberOfRecordsToBeProcesses" . $tableName);
43  $count = 0;
44  if ($totalPfile['cnt'] > 10000) {
45    $count = 10000;
46  } else {
47    $count = $totalPfile['cnt'];
48  }
49  return array($count, $totalPfile['cnt']);
50}
51
52/**
53 * @brief Removes duplicate decisions based on same textfinding for same pfile
54 *
55 * The function first tries to remove all duplicate decisions from deactivated
56 * list then from active list.
57 * @param DbManager $dbManager
58 * @param string $tableName
59 */
60function cleanDecisionTable($dbManager, $tableName)
61{
62  if($dbManager == null){
63    echo "No connection object passed!\n";
64    return false;
65  }
66
67  echo "*** Removing any duplicate manual findings from $tableName ***\n";
68  // First remove only duplicate deactivated statements
69  $sql = "
70 DELETE FROM $tableName
71  WHERE " . $tableName . "_pk IN (SELECT " . $tableName . "_pk
72   FROM (SELECT " . $tableName . "_pk, is_enabled,
73    ROW_NUMBER() OVER (PARTITION BY textfinding, pfile_fk
74                       ORDER BY " . $tableName . "_pk) AS rnum
75   FROM $tableName) AS a
76  WHERE a.is_enabled = FALSE AND a.rnum > 1);";
77
78  $dbManager->begin();
79  $dbManager->queryOnce($sql);
80  $dbManager->commit();
81
82  // Then remove any active duplicate statements
83  $sql = "
84 DELETE FROM $tableName
85  WHERE " . $tableName . "_pk IN (SELECT " . $tableName . "_pk
86   FROM (SELECT " . $tableName . "_pk,
87    ROW_NUMBER() OVER (PARTITION BY textfinding, pfile_fk
88                       ORDER BY " . $tableName . "_pk) AS rnum
89   FROM $tableName) AS a
90  WHERE a.rnum > 1);";
91
92  $dbManager->begin();
93  $dbManager->queryOnce($sql);
94  $dbManager->commit();
95}
96
97/**
98 * @brief Update the hash column of the table with value from textfinding.
99 * @param DbManager $dbManager
100 * @param string $tableName
101 * @return integer Number of entries updated
102 */
103function updateHash($dbManager, $tableName)
104{
105  $totalCount = 0;
106  if($dbManager == null){
107    echo "No connection object passed!\n";
108    return false;
109  }
110  if(DB_TableExists($tableName) != 1) {
111    // Table does not exists (migrating from old version)
112    echo "Table $tableName does not exists, not updating!\n";
113    return 0;
114  }
115
116  $numberOfRecords = calculateNumberOfRecordsToBeProcessed($dbManager, $tableName, "hash");
117  $numberOfRecords = $numberOfRecords[0];
118  while (!empty($numberOfRecords)) {
119    $sql = "SELECT " . $tableName . "_pk AS id, textfinding " .
120      "FROM $tableName WHERE hash IS NULL LIMIT $numberOfRecords;";
121    $statement = __METHOD__ . ".getNullHash.$tableName.$numberOfRecords";
122    $rows = $dbManager->getRows($sql, [], $statement);
123
124    $sql = "UPDATE $tableName AS m " .
125      "SET hash = c.sha256 FROM (VALUES ";
126    $fileShaList = [];
127    foreach ($rows as $row) {
128      $fileShaList[] = "(" . $row["id"] . ",'" .
129      hash('sha256', $row['textfinding']) . "')";
130    }
131    $sql .= join(",", $fileShaList);
132    $sql .= ") AS c(id, sha256) WHERE c.id = m.$tableName" . "_pk;";
133    $dbManager->begin();
134    $dbManager->queryOnce($sql, __METHOD__ . ".update.$tableName.hash");
135    $dbManager->commit();
136
137    $totalCount = $totalCount + $numberOfRecords;
138    $numberOfRecords = calculateNumberOfRecordsToBeProcessed($dbManager, $tableName, "hash");
139    $numberOfRecords = $numberOfRecords[0];
140  }
141  return $totalCount;
142}
143
144
145/**
146 * @brief Update the sha256 column of the table with value from textfinding.
147 * @param DbManager $dbManager
148 * @param string $tableName
149 * @return integer Number of entries updated
150 */
151function updateSHA256($dbManager, $tableName)
152{
153  $totalCount = 0;
154  if ($dbManager == null) {
155    echo "No connection object passed!\n";
156    return false;
157  }
158
159  if (DB_TableExists($tableName) != 1) {
160    // Table does not exists (migrating from old version)
161    echo "Table $tableName does not exists, not updating!\n";
162    return 0;
163  }
164
165  $records = calculateNumberOfRecordsToBeProcessed($dbManager, $tableName, $tableName."_sha256");
166  $lastCount = $records[1];
167  $numberOfRecords = $records[0];
168  while (!empty($numberOfRecords)) {
169    $sql = "SELECT ".$tableName.".".$tableName . "_pk AS id " .
170      "FROM $tableName WHERE $tableName." . $tableName . "_sha256 is NULL " .
171      "LIMIT $numberOfRecords";
172    $statement = __METHOD__ . ".getNullSHA256.$tableName.$numberOfRecords";
173    $rows = $dbManager->getRows($sql, [], $statement);
174
175    $sql = "UPDATE $tableName AS m " .
176    "SET " . $tableName . "_sha256 = c.sha256 " .
177    "FROM (VALUES ";
178    $fileShaList = [];
179    foreach ($rows as $row) {
180      $oneRow = "(" . $row["id"];
181      $filePath = RepPath($row['id'], "files");
182      if (file_exists($filePath)) {
183        $hash = strtoupper(hash_file('sha256', $filePath));
184        $oneRow .= ",'$hash')";
185      } else {
186        $oneRow .= ",null)";
187      }
188      $fileShaList[] = $oneRow;
189    }
190    $sql .= join(",", $fileShaList);
191    $sql .= ") AS c(id, sha256) WHERE c.id = m.$tableName" . "_pk;";
192    $dbManager->begin();
193    $dbManager->queryOnce($sql, __METHOD__ . ".updatePfile_SHA256");
194    $dbManager->commit();
195
196    $totalCount = $totalCount + $numberOfRecords;
197    echo "* $totalCount pfile records updated *\n";
198
199    $records = calculateNumberOfRecordsToBeProcessed($dbManager, $tableName, $tableName."_sha256");
200    if ($lastCount == $records[1]) {
201      // NULL files in last loop and this loop are same.
202      // All remaining records does not exist in FS
203      // Prevent from infinite loop
204      break;
205    }
206    $lastCount = $records[1];
207    $numberOfRecords = $records[0];
208  }
209  return $totalCount;
210}
211
212/**
213 * @brief Check if the given column contains only upper case entries
214 * @param $dbManager DbManager
215 * @param $tableName Table to check
216 * @param $colName   Column of the table to check
217 * @param $where     Additional where clause conditions
218 */
219function isColumnUpperCase($dbManager, $tableName, $colName, $where)
220{
221  if (!empty($where)) {
222    $where = "AND $where";
223  }
224  $sql = "SELECT count(*) AS cnt FROM $tableName " .
225    "WHERE $colName != UPPER($colName) $where;";
226  $row = $dbManager->getSingleRow($sql, [], __METHOD__ .
227    ".checkLowerCaseIn.$tableName".strlen($where));
228  return ($row["cnt"] == 0);
229}
230
231function updatePfileSha256($dbManager, $force = false)
232{
233  if (! isColumnUpperCase($dbManager, "pfile", "pfile_sha256", "pfile_sha256 IS NOT NULL")) {
234    // Uppercase already existing hashes
235    $sql = "UPDATE pfile SET pfile_sha256 = UPPER(pfile_sha256);";
236    $statement = __METHOD__ . ".updatePfileSHA256ToUpper";
237    $dbManager->begin();
238    $dbManager->queryOnce($sql, $statement);
239    $dbManager->commit();
240  }
241  $totalPfile = 0;
242  $totalPfile = calculateNumberOfRecordsToBeProcessed($dbManager, "pfile", "pfile_sha256");
243  if (!empty($totalPfile)) {
244    $totalPfile = $totalPfile[1];
245  } else {
246    $totalPfile = 0;
247  }
248
249  if ($totalPfile == 0) {
250    // Migration not required
251    return 0;
252  }
253  $envYes = getenv('FOSSPFILE');
254  if (!$force) {
255    $force = !empty($envYes);
256  }
257
258  $timePerJob = 0.00905919;
259  $totalTime = floatval($totalPfile) * $timePerJob;
260  $minutes = intval($totalTime / 60.0);
261  $hours = floor($minutes / 60);
262  $actualMinutes = $minutes - ($hours * 60);
263
264  echo "*** Calculation of SHA256 for pfiles will require approx $hours hrs " .
265    "$actualMinutes mins. ***\n";
266
267  if (!$force && $minutes > 45) {
268    $REDCOLOR = "\033[0;31m";
269    $NOCOLOR = "\033[0m";
270    echo "\n*********************************************************" .
271      "***********************\n";
272    echo "*** " . $REDCOLOR . "Error, script will take too much time. Not " .
273      "calculating SHA256 for pfile." . $NOCOLOR . " ***\n";
274    echo "*** Either rerun the fo-postinstall with \"--force-pfile\" flag " .
275      "or set         ***\n" .
276      "*** \"FOSSPFILE=1\" in environment or run script at                " .
277      "            ***\n";
278    echo "*** \"" . dirname(__FILE__) .
279      "/dbmigrate_pfile_calculate_sha256.php\" to continue as a separate process ***\n";
280    echo "*********************************************************" .
281      "***********************\n";
282    return 0;
283  }
284
285  try {
286    echo "*** Updating the sha256 values of pfiles ***\n";
287    $countPfile = updateSHA256($dbManager, "pfile");
288    echo "*** Updated sha256 of $countPfile/$totalPfile records of pfile ***\n";
289  } catch (Exception $e) {
290    echo "*** Something went wrong. Try again! ***\n";
291    $dbManager->rollback();
292    return -1;
293  }
294}
295
296/**
297 * Migration from FOSSology 3.5.0 to 3.6.0
298 * @param DbManager $dbManager
299 * @param boolean $force Set true to force run the script.
300 */
301function migrate_35_36($dbManager, $force = false)
302{
303  $total = 0;
304  $tables = [
305    "copyright_decision",
306    "ecc_decision",
307    "keyword_decision"
308  ];
309  if (!$force) {
310    $sql = "WITH decision_tables AS(".
311    "  SELECT count(*) AS cnt FROM $tables[0] WHERE hash IS NULL" .
312    "  UNION" .
313    "  SELECT count(*) AS cnt FROM $tables[1] WHERE hash IS NULL" .
314    "  UNION" .
315    "  SELECT count(*) AS cnt FROM $tables[2] WHERE hash IS NULL" .
316    ") SELECT SUM(cnt) AS total FROM decision_tables;";
317    $total = $dbManager->getSingleRow($sql, [], __METHOD__ .
318      ".checkIfMigrationDone");
319    $total = intval($total["total"]);
320
321    if ($total == 0) {
322      // Migration not required
323      return;
324    }
325  }
326
327  try {
328    $count = 0;
329    // Updating the copyright/ecc/keyword findings
330    echo "*** Updating the hash values of manual copyright/ecc/keyword findings ***\n";
331
332    foreach ($tables as $table) {
333      cleanDecisionTable($dbManager, $table);
334      $count += updateHash($dbManager, $table);
335    }
336
337    echo "*** Updated hash of $count/$total manual copyright/ecc/keyword findings ***\n";
338  } catch (Exception $e) {
339    echo "*** Something went wrong. Try running postinstall again! ***\n";
340    $dbManager->rollback();
341  }
342}
343