1<?php 2/* 3Copyright (C) 2014-2015, Siemens AG 4 5This program is free software; you can redistribute it and/or 6modify it under the terms of the GNU General Public License 7version 2 as published by the Free Software Foundation. 8 9This program is distributed in the hope that it will be useful, 10but WITHOUT ANY WARRANTY; without even the implied warranty of 11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12GNU General Public License for more details. 13 14You should have received a copy of the GNU General Public License along 15with this program; if not, write to the Free Software Foundation, Inc., 1651 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17*/ 18 19namespace Fossology\Lib\Application; 20 21use Fossology\Lib\BusinessRules\LicenseMap; 22use Fossology\Lib\Db\DbManager; 23use Fossology\Lib\Util\ArrayOperation; 24use Fossology\Lib\Dao\UserDao; 25 26/** 27 * @file 28 * @brief Import licenses from CSV 29 */ 30 31/** 32 * @class LicenseCsvImport 33 * @brief Import licenses from CSV 34 */ 35class LicenseCsvImport 36{ 37 /** @var DbManager $dbManager 38 * DB manager to use */ 39 protected $dbManager; 40 /** @var UserDao $userDao 41 * User DAO to use */ 42 protected $userDao; 43 /** @var string $delimiter 44 * Delimiter used in CSV */ 45 protected $delimiter = ','; 46 /** @var string $enclosure 47 * Enclosure used in CSV */ 48 protected $enclosure = '"'; 49 /** @var null|array $headrow 50 * Header of CSV */ 51 protected $headrow = null; 52 /** @var array $nkMap 53 * Map based on license shortname */ 54 protected $nkMap = array(); 55 /** @var array $mdkMap 56 * Map based on license text MD5 */ 57 protected $mdkMap = array(); 58 /** @var array $alias 59 * Alias for headers */ 60 protected $alias = array( 61 'shortname'=>array('shortname','Short Name'), 62 'fullname'=>array('fullname','Long Name'), 63 'text'=>array('text','Full Text'), 64 'parent_shortname'=>array('parent_shortname','Decider Short Name'), 65 'report_shortname'=>array('report_shortname','Regular License Text Short Name'), 66 'url'=>array('url','URL'), 67 'notes'=>array('notes'), 68 'source'=>array('source','Foreign ID'), 69 'risk'=>array('risk','risk_level'), 70 'group'=>array('group','License group'), 71 'obligations'=>array('obligations','License obligations') 72 ); 73 74 /** 75 * Constructor 76 * @param DbManager $dbManager DB manager to use 77 * @param UserDao $userDao User Dao to use 78 */ 79 public function __construct(DbManager $dbManager, UserDao $userDao) 80 { 81 $this->dbManager = $dbManager; 82 $this->userDao = $userDao; 83 } 84 85 /** 86 * @brief Update the delimiter 87 * @param string $delimiter New delimiter to use. 88 */ 89 public function setDelimiter($delimiter=',') 90 { 91 $this->delimiter = substr($delimiter,0,1); 92 } 93 94 /** 95 * @brief Update the enclosure 96 * @param string $enclosure New enclosure to use. 97 */ 98 public function setEnclosure($enclosure='"') 99 { 100 $this->enclosure = substr($enclosure,0,1); 101 } 102 103 /** 104 * @brief Read the CSV line by line and import it. 105 * @param string $filename Location of the CSV file. 106 * @return string message Error message, if any. Otherwise 107 * `Read csv: <count> licenses` on success. 108 */ 109 public function handleFile($filename) 110 { 111 if (!is_file($filename) || ($handle = fopen($filename, 'r')) === false) { 112 return _('Internal error'); 113 } 114 $cnt = -1; 115 $msg = ''; 116 try { 117 while (($row = fgetcsv($handle,0,$this->delimiter,$this->enclosure)) !== false) { 118 $log = $this->handleCsv($row); 119 if (!empty($log)) { 120 $msg .= "$log\n"; 121 } 122 $cnt++; 123 } 124 $msg .= _('Read csv').(": $cnt ")._('licenses'); 125 } catch(\Exception $e) { 126 fclose($handle); 127 return $msg .= _('Error while parsing file').': '.$e->getMessage(); 128 } 129 fclose($handle); 130 return $msg; 131 } 132 133 /** 134 * Handle a single row read from the CSV. If headrow is not set, then handle 135 * current row as head row. 136 * @param array $row Single row from CSV 137 * @return string $log Log messages 138 */ 139 private function handleCsv($row) 140 { 141 if ($this->headrow === null) { 142 $this->headrow = $this->handleHeadCsv($row); 143 return 'head okay'; 144 } 145 146 $mRow = array(); 147 foreach (array('shortname','fullname','text') as $needle) { 148 $mRow[$needle] = $row[$this->headrow[$needle]]; 149 } 150 foreach (array('parent_shortname' => null, 'report_shortname' => null, 151 'url' => '', 'notes' => '', 'source' => '', 'risk' => 0, 152 'group' => null) as $optNeedle=>$defaultValue) { 153 $mRow[$optNeedle] = $defaultValue; 154 if ($this->headrow[$optNeedle]!==false && array_key_exists($this->headrow[$optNeedle], $row)) { 155 $mRow[$optNeedle] = $row[$this->headrow[$optNeedle]]; 156 } 157 } 158 159 return $this->handleCsvLicense($mRow); 160 } 161 162 /** 163 * @brief Handle a row as head row. 164 * @param array $row Head row to be handled. 165 * @throws \Exception 166 * @return boolean[]|mixed[] Parsed head row. 167 */ 168 private function handleHeadCsv($row) 169 { 170 $headrow = array(); 171 foreach (array('shortname','fullname','text') as $needle) { 172 $col = ArrayOperation::multiSearch($this->alias[$needle], $row); 173 if (false === $col) { 174 throw new \Exception("Undetermined position of $needle"); 175 } 176 $headrow[$needle] = $col; 177 } 178 foreach (array('parent_shortname', 'report_shortname', 'url', 'notes', 179 'source', 'risk', 'group') as $optNeedle) { 180 $headrow[$optNeedle] = ArrayOperation::multiSearch($this->alias[$optNeedle], $row); 181 } 182 return $headrow; 183 } 184 185 /** 186 * @brief Update the license info in the DB. 187 * @param array $row Row with new values. 188 * @param array $rfPk Matched license ID. 189 * @return string Log messages. 190 */ 191 private function updateLicense($row, $rfPk) 192 { 193 $stmt = __METHOD__ . '.getOldLicense'; 194 $oldLicense = $this->dbManager->getSingleRow('SELECT ' . 195 'rf_shortname, rf_fullname, rf_text, rf_url, rf_notes, rf_source, rf_risk ' . 196 'FROM license_ref WHERE rf_pk = $1', array($rfPk), $stmt); 197 198 $stmt = __METHOD__ . '.getOldMapping'; 199 $sql = 'SELECT rf_parent FROM license_map WHERE rf_fk = $1 AND usage = $2;'; 200 $oldParent = null; 201 $oldParentRow = $this->dbManager->getSingleRow($sql, array($rfPk, 202 LicenseMap::CONCLUSION), $stmt); 203 if (!empty($oldParentRow)) { 204 $oldParent = $oldParentRow['rf_parent']; 205 } 206 $oldReport = null; 207 $oldReportRow = $this->dbManager->getSingleRow($sql, array($rfPk, 208 LicenseMap::REPORT), $stmt); 209 if (!empty($oldReportRow)) { 210 $oldReport = $oldReportRow['rf_parent']; 211 } 212 213 $newParent = null; 214 $newParent = ($row['parent_shortname'] == null) ? null : 215 $this->getKeyFromShortname($row['parent_shortname']); 216 217 $newReport = null; 218 $newReport = ($row['report_shortname'] == null) ? null : 219 $this->getKeyFromShortname($row['report_shortname']); 220 221 $log = "License '$row[shortname]' already exists in DB (id = $rfPk)"; 222 $stmt = __METHOD__ . '.updateLicense'; 223 $sql = "UPDATE license_ref SET "; 224 if (! empty($row['group'])) { 225 $sql = "UPDATE license_candidate SET "; 226 } 227 $extraParams = array(); 228 $param = array($rfPk); 229 if (!empty($row['fullname']) && $row['fullname'] != $oldLicense['rf_fullname']) { 230 $param[] = $row['fullname']; 231 $stmt .= '.fullN'; 232 $extraParams[] = "rf_fullname=$" . count($param); 233 $log .= ", updated fullname"; 234 } 235 if (!empty($row['text']) && $row['text'] != $oldLicense['rf_text']) { 236 $param[] = $row['text']; 237 $stmt .= '.text'; 238 $extraParams[] = "rf_text=$" . count($param) . ",rf_md5=md5($" . 239 count($param) . ")"; 240 $log .= ", updated text"; 241 } 242 if (!empty($row['url']) && $row['url'] != $oldLicense['rf_url']) { 243 $param[] = $row['url']; 244 $stmt .= '.url'; 245 $extraParams[] = "rf_url=$" . count($param); 246 $log .= ", updated URL"; 247 } 248 if (!empty($row['notes']) && $row['notes'] != $oldLicense['rf_notes']) { 249 $param[] = $row['notes']; 250 $stmt .= '.notes'; 251 $extraParams[] = "rf_notes=$" . count($param); 252 $log .= ", updated notes"; 253 } 254 if (!empty($row['source']) && $row['source'] != $oldLicense['rf_source']) { 255 $param[] = $row['source']; 256 $stmt .= '.updSource'; 257 $extraParams[] = "rf_source=$".count($param); 258 $log .= ', updated the source'; 259 } 260 if (!empty($row['risk']) && $row['risk'] != $oldLicense['rf_risk']) { 261 $param[] = $row['risk']; 262 $stmt .= '.updRisk'; 263 $extraParams[] = "rf_risk=$".count($param); 264 $log .= ', updated the risk level'; 265 } 266 if (count($param) > 1) { 267 $sql .= join(",", $extraParams); 268 $sql .= " WHERE rf_pk=$1;"; 269 $this->dbManager->getSingleRow($sql, $param, $stmt); 270 $this->mdkMap[md5($row['text'])] = $rfPk; 271 } 272 273 if (($oldParent != $newParent) && $this->setMap($newParent, $rfPk, LicenseMap::CONCLUSION)) { 274 $log .= " with conclusion '$row[parent_shortname]'"; 275 } 276 if (($oldReport != $newReport) && $this->setMap($newReport, $rfPk, LicenseMap::REPORT)) { 277 $log .= " reporting '$row[report_shortname]'"; 278 } 279 return $log; 280 } 281 282 /** 283 * @brief Handle a single row from CSV. 284 * 285 * The function checks if the license text hash is already in the DB, then 286 * updates it. Otherwise inserts new row in the DB. 287 * @param array $row CSV row to be inserted. 288 * @return string Log messages. 289 */ 290 private function handleCsvLicense($row) 291 { 292 if (empty($row['risk'])) { 293 $row['risk'] = 0; 294 } 295 $rfPk = $this->getKeyFromShortname($row['shortname'], $row['group']); 296 $md5Match = $this->getKeyFromMd5($row['text']); 297 298 // If shortname exists, does not collide with other texts and is not 299 // candidate 300 if ($rfPk !== false) { 301 if (! empty($row['group']) || ($md5Match == $rfPk || $md5Match === false)) { 302 return $this->updateLicense($row, $rfPk); 303 } else { 304 return "Error: MD5 checksum of '" . $row['shortname'] . 305 "' collides with license id=$md5Match"; 306 } 307 } 308 if ($md5Match !== false && empty($row['group'])) { 309 return "Error: MD5 checksum of '" . $row['shortname'] . 310 "' collides with license id=$md5Match"; 311 } 312 313 $return = ""; 314 if (!empty($row['group'])) { 315 $return = $this->insertNewLicense($row, "license_candidate"); 316 } else { 317 $return = $this->insertNewLicense($row, "license_ref"); 318 } 319 return $return; 320 } 321 322 /** 323 * @brief Insert in `license_map` table if the license conclusion is 324 * non-trivial. 325 * 326 * If the from and to are not same and from exists in database, then the 327 * conclusion is non-trivial. 328 * @param string $fromName Parent license name 329 * @param string $toName License name 330 * @param string $usage Usage of the license 331 * @return boolean True if license is non-trivial, false otherwise. 332 */ 333 private function insertMapIfNontrivial($fromName,$toName,$usage) 334 { 335 $isNontrivial = ($fromName!==null && $fromName!=$toName && $this->getKeyFromShortname($fromName)!==false); 336 if ($isNontrivial) { 337 $this->dbManager->insertTableRow('license_map', 338 array('rf_fk'=>$this->getKeyFromShortname($toName), 339 'rf_parent'=>$this->getKeyFromShortname($fromName), 340 'usage'=> $usage)); 341 } 342 return $isNontrivial; 343 } 344 345 /** 346 * @brief Get the license id using license shortname from DB or nkMap. 347 * @param string $shortname Shortname of the license. 348 * @return int License id 349 */ 350 private function getKeyFromShortname($shortname, $groupFk = null) 351 { 352 $keyName = $shortname; 353 $tableName = "license_ref"; 354 $addCondition = ""; 355 $statement = __METHOD__ . ".getId"; 356 $params = array($shortname); 357 358 if ($groupFk != null) { 359 $keyName .= $groupFk; 360 $tableName = "license_candidate"; 361 $addCondition = "AND group_fk = $2"; 362 $statement .= ".candidate"; 363 $params[] = $this->userDao->getGroupIdByName($groupFk); 364 } 365 $sql = "SELECT rf_pk FROM ONLY $tableName WHERE rf_shortname = $1 $addCondition;"; 366 if (array_key_exists($keyName, $this->nkMap)) { 367 return $this->nkMap[$keyName]; 368 } 369 $row = $this->dbManager->getSingleRow($sql, $params, $statement); 370 $this->nkMap[$keyName] = ($row===false) ? false : $row['rf_pk']; 371 return $this->nkMap[$keyName]; 372 } 373 374 /** 375 * Get the license id using license text's checksum from DB or mdkMap. 376 * @param string $licenseText License text 377 * @return integer License id 378 */ 379 private function getKeyFromMd5($licenseText) 380 { 381 $md5 = md5($licenseText); 382 if (array_key_exists($md5, $this->mdkMap)) { 383 return $this->mdkMap[$md5]; 384 } 385 $row = $this->dbManager->getSingleRow("SELECT rf_pk " . 386 "FROM ONLY license_ref WHERE rf_md5=md5($1)", 387 array($licenseText)); 388 $this->mdkMap[$md5] = (empty($row)) ? false : $row['rf_pk']; 389 return $this->mdkMap[$md5]; 390 } 391 392 /** 393 * @brief Update license mappings 394 * 395 * First check if the mapping already exists for the license, then update it. 396 * If the mapping does not exists, then insert it. 397 * @param integer $from The new mapping license 398 * @param integer $to The license to be updated 399 * @param integer $usage The usage 400 * @return boolean False if mapping could not be updated or $from is empty. 401 */ 402 private function setMap($from, $to, $usage) 403 { 404 $return = false; 405 if (!empty($from)) { 406 $sql = "SELECT license_map_pk, rf_parent FROM license_map WHERE rf_fk = $1 AND usage = $2;"; 407 $statement = __METHOD__ . ".getCurrentMapping"; 408 $row = $this->dbManager->getSingleRow($sql, array($to, $usage), $statement); 409 if (!empty($row) && $row['rf_parent'] != $from) { 410 $this->dbManager->updateTableRow("license_map", array( 411 'rf_fk' => $to, 412 'rf_parent' => $from, 413 'usage' => $usage 414 ), 'license_map_pk', $row['license_map_pk']); 415 $return = true; 416 } elseif (empty($row)) { 417 $this->dbManager->insertTableRow('license_map', array( 418 'rf_fk' => $to, 419 'rf_parent' => $from, 420 'usage' => $usage 421 )); 422 $return = true; 423 } 424 } 425 return $return; 426 } 427 428 /** 429 * @brief Insert a new license in DB 430 * 431 * Creates a new main license/candidate license based on table name sent 432 * and if the required group exists in DB. 433 * @param array $row Rows comming from CSV 434 * @param string $tableName Table where this new license should go to 435 * @return string Log messages 436 */ 437 private function insertNewLicense($row, $tableName = "license_ref") 438 { 439 $stmtInsert = __METHOD__ . '.insert.' . $tableName; 440 $columns = array( 441 "rf_shortname" => $row['shortname'], 442 "rf_fullname" => $row['fullname'], 443 "rf_text" => $row['text'], 444 "rf_md5" => md5($row['text']), 445 "rf_detector_type" => 1, 446 "rf_url" => $row['url'], 447 "rf_notes" => $row['notes'], 448 "rf_source" => $row['source'], 449 "rf_risk" => $row['risk'] 450 ); 451 452 $as = ""; 453 if ($tableName == "license_candidate") { 454 $groupId = $this->userDao->getGroupIdByName($row['group']); 455 if (empty($groupId)) { 456 return "Error: Unable to insert candidate license " . $row['shortname'] . 457 " as group " . $row['group'] . " does not exist"; 458 } 459 $columns["group_fk"] = $groupId; 460 $columns["marydone"] = $this->dbManager->booleanToDb(true); 461 $as = " as candidate license under group " . $row["group"]; 462 } 463 464 $newPk = $this->dbManager->insertTableRow($tableName, $columns, $stmtInsert, 'rf_pk'); 465 466 if ($tableName == "license_candidate") { 467 $this->nkMap[$row['shortname'].$row['group']] = $newPk; 468 } else { 469 $this->nkMap[$row['shortname']] = $newPk; 470 } 471 $this->mdkMap[md5($row['text'])] = $newPk; 472 $return = "Inserted '$row[shortname]' in DB" . $as; 473 474 if ($this->insertMapIfNontrivial($row['parent_shortname'], $row['shortname'], LicenseMap::CONCLUSION)) { 475 $return .= " with conclusion '$row[parent_shortname]'"; 476 } 477 if ($this->insertMapIfNontrivial($row['report_shortname'], $row['shortname'], LicenseMap::REPORT)) { 478 $return .= " reporting '$row[report_shortname]'"; 479 } 480 return $return; 481 } 482} 483