1<?php
2/*
3Copyright (C) 2014-2015, Siemens AG
4
5This program is free software; you can redistribute it and/or
6modify it under the terms of the GNU General Public License
7version 2 as published by the Free Software Foundation.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along
15with this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17*/
18
19namespace Fossology\Lib\Application;
20
21use Fossology\Lib\BusinessRules\LicenseMap;
22use Fossology\Lib\Db\DbManager;
23use Fossology\Lib\Util\ArrayOperation;
24use Fossology\Lib\Dao\UserDao;
25
26/**
27 * @file
28 * @brief Import licenses from CSV
29 */
30
31/**
32 * @class LicenseCsvImport
33 * @brief Import licenses from CSV
34 */
35class LicenseCsvImport
36{
37  /** @var DbManager $dbManager
38   * DB manager to use */
39  protected $dbManager;
40  /** @var UserDao $userDao
41   * User DAO to use */
42  protected $userDao;
43  /** @var string $delimiter
44   * Delimiter used in CSV */
45  protected $delimiter = ',';
46  /** @var string $enclosure
47   * Enclosure used in CSV */
48  protected $enclosure = '"';
49  /** @var null|array $headrow
50   * Header of CSV */
51  protected $headrow = null;
52  /** @var array $nkMap
53   * Map based on license shortname */
54  protected $nkMap = array();
55  /** @var array $mdkMap
56   * Map based on license text MD5 */
57  protected $mdkMap = array();
58  /** @var array $alias
59   * Alias for headers */
60  protected $alias = array(
61      'shortname'=>array('shortname','Short Name'),
62      'fullname'=>array('fullname','Long Name'),
63      'text'=>array('text','Full Text'),
64      'parent_shortname'=>array('parent_shortname','Decider Short Name'),
65      'report_shortname'=>array('report_shortname','Regular License Text Short Name'),
66      'url'=>array('url','URL'),
67      'notes'=>array('notes'),
68      'source'=>array('source','Foreign ID'),
69      'risk'=>array('risk','risk_level'),
70      'group'=>array('group','License group'),
71      'obligations'=>array('obligations','License obligations')
72      );
73
74  /**
75   * Constructor
76   * @param DbManager $dbManager DB manager to use
77   * @param UserDao $userDao     User Dao to use
78   */
79  public function __construct(DbManager $dbManager, UserDao $userDao)
80  {
81    $this->dbManager = $dbManager;
82    $this->userDao = $userDao;
83  }
84
85  /**
86   * @brief Update the delimiter
87   * @param string $delimiter New delimiter to use.
88   */
89  public function setDelimiter($delimiter=',')
90  {
91    $this->delimiter = substr($delimiter,0,1);
92  }
93
94  /**
95   * @brief Update the enclosure
96   * @param string $enclosure New enclosure to use.
97   */
98  public function setEnclosure($enclosure='"')
99  {
100    $this->enclosure = substr($enclosure,0,1);
101  }
102
103  /**
104   * @brief Read the CSV line by line and import it.
105   * @param string $filename Location of the CSV file.
106   * @return string message Error message, if any. Otherwise
107   *         `Read csv: <count> licenses` on success.
108   */
109  public function handleFile($filename)
110  {
111    if (!is_file($filename) || ($handle = fopen($filename, 'r')) === false) {
112      return _('Internal error');
113    }
114    $cnt = -1;
115    $msg = '';
116    try {
117      while (($row = fgetcsv($handle,0,$this->delimiter,$this->enclosure)) !== false) {
118        $log = $this->handleCsv($row);
119        if (!empty($log)) {
120          $msg .= "$log\n";
121        }
122        $cnt++;
123      }
124      $msg .= _('Read csv').(": $cnt ")._('licenses');
125    } catch(\Exception $e) {
126      fclose($handle);
127      return $msg .= _('Error while parsing file').': '.$e->getMessage();
128    }
129    fclose($handle);
130    return $msg;
131  }
132
133  /**
134   * Handle a single row read from the CSV. If headrow is not set, then handle
135   * current row as head row.
136   * @param array $row   Single row from CSV
137   * @return string $log Log messages
138   */
139  private function handleCsv($row)
140  {
141    if ($this->headrow === null) {
142      $this->headrow = $this->handleHeadCsv($row);
143      return 'head okay';
144    }
145
146    $mRow = array();
147    foreach (array('shortname','fullname','text') as $needle) {
148      $mRow[$needle] = $row[$this->headrow[$needle]];
149    }
150    foreach (array('parent_shortname' => null, 'report_shortname' => null,
151      'url' => '', 'notes' => '', 'source' => '', 'risk' => 0,
152      'group' => null) as $optNeedle=>$defaultValue) {
153      $mRow[$optNeedle] = $defaultValue;
154      if ($this->headrow[$optNeedle]!==false && array_key_exists($this->headrow[$optNeedle], $row)) {
155        $mRow[$optNeedle] = $row[$this->headrow[$optNeedle]];
156      }
157    }
158
159    return $this->handleCsvLicense($mRow);
160  }
161
162  /**
163   * @brief Handle a row as head row.
164   * @param array $row  Head row to be handled.
165   * @throws \Exception
166   * @return boolean[]|mixed[] Parsed head row.
167   */
168  private function handleHeadCsv($row)
169  {
170    $headrow = array();
171    foreach (array('shortname','fullname','text') as $needle) {
172      $col = ArrayOperation::multiSearch($this->alias[$needle], $row);
173      if (false === $col) {
174        throw new \Exception("Undetermined position of $needle");
175      }
176      $headrow[$needle] = $col;
177    }
178    foreach (array('parent_shortname', 'report_shortname', 'url', 'notes',
179      'source', 'risk', 'group') as $optNeedle) {
180      $headrow[$optNeedle] = ArrayOperation::multiSearch($this->alias[$optNeedle], $row);
181    }
182    return $headrow;
183  }
184
185  /**
186   * @brief Update the license info in the DB.
187   * @param array $row  Row with new values.
188   * @param array $rfPk Matched license ID.
189   * @return string Log messages.
190   */
191  private function updateLicense($row, $rfPk)
192  {
193    $stmt = __METHOD__ . '.getOldLicense';
194    $oldLicense = $this->dbManager->getSingleRow('SELECT ' .
195      'rf_shortname, rf_fullname, rf_text, rf_url, rf_notes, rf_source, rf_risk ' .
196      'FROM license_ref WHERE rf_pk = $1', array($rfPk), $stmt);
197
198    $stmt = __METHOD__ . '.getOldMapping';
199    $sql = 'SELECT rf_parent FROM license_map WHERE rf_fk = $1 AND usage = $2;';
200    $oldParent = null;
201    $oldParentRow = $this->dbManager->getSingleRow($sql, array($rfPk,
202      LicenseMap::CONCLUSION), $stmt);
203    if (!empty($oldParentRow)) {
204      $oldParent = $oldParentRow['rf_parent'];
205    }
206    $oldReport = null;
207    $oldReportRow = $this->dbManager->getSingleRow($sql, array($rfPk,
208      LicenseMap::REPORT), $stmt);
209    if (!empty($oldReportRow)) {
210      $oldReport = $oldReportRow['rf_parent'];
211    }
212
213    $newParent = null;
214    $newParent = ($row['parent_shortname'] == null) ? null :
215      $this->getKeyFromShortname($row['parent_shortname']);
216
217    $newReport = null;
218    $newReport = ($row['report_shortname'] == null) ? null :
219      $this->getKeyFromShortname($row['report_shortname']);
220
221    $log = "License '$row[shortname]' already exists in DB (id = $rfPk)";
222    $stmt = __METHOD__ . '.updateLicense';
223    $sql = "UPDATE license_ref SET ";
224    if (! empty($row['group'])) {
225      $sql = "UPDATE license_candidate SET ";
226    }
227    $extraParams = array();
228    $param = array($rfPk);
229    if (!empty($row['fullname']) && $row['fullname'] != $oldLicense['rf_fullname']) {
230      $param[] = $row['fullname'];
231      $stmt .= '.fullN';
232      $extraParams[] = "rf_fullname=$" . count($param);
233      $log .= ", updated fullname";
234    }
235    if (!empty($row['text']) && $row['text'] != $oldLicense['rf_text']) {
236      $param[] = $row['text'];
237      $stmt .= '.text';
238      $extraParams[] = "rf_text=$" . count($param) . ",rf_md5=md5($" .
239        count($param) . ")";
240      $log .= ", updated text";
241    }
242    if (!empty($row['url']) && $row['url'] != $oldLicense['rf_url']) {
243      $param[] = $row['url'];
244      $stmt .= '.url';
245      $extraParams[] = "rf_url=$" . count($param);
246      $log .= ", updated URL";
247    }
248    if (!empty($row['notes']) && $row['notes'] != $oldLicense['rf_notes']) {
249      $param[] = $row['notes'];
250      $stmt .= '.notes';
251      $extraParams[] = "rf_notes=$" . count($param);
252      $log .= ", updated notes";
253    }
254    if (!empty($row['source']) && $row['source'] != $oldLicense['rf_source']) {
255      $param[] = $row['source'];
256      $stmt .= '.updSource';
257      $extraParams[] = "rf_source=$".count($param);
258      $log .= ', updated the source';
259    }
260    if (!empty($row['risk']) && $row['risk'] != $oldLicense['rf_risk']) {
261      $param[] = $row['risk'];
262      $stmt .= '.updRisk';
263      $extraParams[] = "rf_risk=$".count($param);
264      $log .= ', updated the risk level';
265    }
266    if (count($param) > 1) {
267      $sql .= join(",", $extraParams);
268      $sql .= " WHERE rf_pk=$1;";
269      $this->dbManager->getSingleRow($sql, $param, $stmt);
270      $this->mdkMap[md5($row['text'])] = $rfPk;
271    }
272
273    if (($oldParent != $newParent) && $this->setMap($newParent, $rfPk, LicenseMap::CONCLUSION)) {
274      $log .= " with conclusion '$row[parent_shortname]'";
275    }
276    if (($oldReport != $newReport) && $this->setMap($newReport, $rfPk, LicenseMap::REPORT)) {
277      $log .= " reporting '$row[report_shortname]'";
278    }
279    return $log;
280  }
281
282  /**
283   * @brief Handle a single row from CSV.
284   *
285   * The function checks if the license text hash is already in the DB, then
286   * updates it. Otherwise inserts new row in the DB.
287   * @param array $row CSV row to be inserted.
288   * @return string Log messages.
289   */
290  private function handleCsvLicense($row)
291  {
292    if (empty($row['risk'])) {
293      $row['risk'] = 0;
294    }
295    $rfPk = $this->getKeyFromShortname($row['shortname'], $row['group']);
296    $md5Match = $this->getKeyFromMd5($row['text']);
297
298    // If shortname exists, does not collide with other texts and is not
299    // candidate
300    if ($rfPk !== false) {
301      if (! empty($row['group']) || ($md5Match == $rfPk || $md5Match === false)) {
302        return $this->updateLicense($row, $rfPk);
303      } else {
304        return "Error: MD5 checksum of '" . $row['shortname'] .
305          "' collides with license id=$md5Match";
306      }
307    }
308    if ($md5Match !== false && empty($row['group'])) {
309      return "Error: MD5 checksum of '" . $row['shortname'] .
310        "' collides with license id=$md5Match";
311    }
312
313    $return = "";
314    if (!empty($row['group'])) {
315      $return = $this->insertNewLicense($row, "license_candidate");
316    } else {
317      $return = $this->insertNewLicense($row, "license_ref");
318    }
319    return $return;
320  }
321
322  /**
323   * @brief Insert in `license_map` table if the license conclusion is
324   * non-trivial.
325   *
326   * If the from and to are not same and from exists in database, then the
327   * conclusion is non-trivial.
328   * @param string $fromName  Parent license name
329   * @param string $toName    License name
330   * @param string $usage     Usage of the license
331   * @return boolean True if license is non-trivial, false otherwise.
332   */
333  private function insertMapIfNontrivial($fromName,$toName,$usage)
334  {
335    $isNontrivial = ($fromName!==null && $fromName!=$toName && $this->getKeyFromShortname($fromName)!==false);
336    if ($isNontrivial) {
337      $this->dbManager->insertTableRow('license_map',
338        array('rf_fk'=>$this->getKeyFromShortname($toName),
339            'rf_parent'=>$this->getKeyFromShortname($fromName),
340            'usage'=> $usage));
341    }
342    return $isNontrivial;
343  }
344
345  /**
346   * @brief Get the license id using license shortname from DB or nkMap.
347   * @param string $shortname Shortname of the license.
348   * @return int License id
349   */
350  private function getKeyFromShortname($shortname, $groupFk = null)
351  {
352    $keyName = $shortname;
353    $tableName = "license_ref";
354    $addCondition = "";
355    $statement = __METHOD__ . ".getId";
356    $params = array($shortname);
357
358    if ($groupFk != null) {
359      $keyName .= $groupFk;
360      $tableName = "license_candidate";
361      $addCondition = "AND group_fk = $2";
362      $statement .= ".candidate";
363      $params[] = $this->userDao->getGroupIdByName($groupFk);
364    }
365    $sql = "SELECT rf_pk FROM ONLY $tableName WHERE rf_shortname = $1 $addCondition;";
366    if (array_key_exists($keyName, $this->nkMap)) {
367      return $this->nkMap[$keyName];
368    }
369    $row = $this->dbManager->getSingleRow($sql, $params, $statement);
370    $this->nkMap[$keyName] = ($row===false) ? false : $row['rf_pk'];
371    return $this->nkMap[$keyName];
372  }
373
374  /**
375   * Get the license id using license text's checksum from DB or mdkMap.
376   * @param string $licenseText License text
377   * @return integer License id
378   */
379  private function getKeyFromMd5($licenseText)
380  {
381    $md5 = md5($licenseText);
382    if (array_key_exists($md5, $this->mdkMap)) {
383      return $this->mdkMap[$md5];
384    }
385    $row = $this->dbManager->getSingleRow("SELECT rf_pk " .
386      "FROM ONLY license_ref WHERE rf_md5=md5($1)",
387      array($licenseText));
388    $this->mdkMap[$md5] = (empty($row)) ? false : $row['rf_pk'];
389    return $this->mdkMap[$md5];
390  }
391
392  /**
393   * @brief Update license mappings
394   *
395   * First check if the mapping already exists for the license, then update it.
396   * If the mapping does not exists, then insert it.
397   * @param integer $from  The new mapping license
398   * @param integer $to    The license to be updated
399   * @param integer $usage The usage
400   * @return boolean False if mapping could not be updated or $from is empty.
401   */
402  private function setMap($from, $to, $usage)
403  {
404    $return = false;
405    if (!empty($from)) {
406      $sql = "SELECT license_map_pk, rf_parent FROM license_map WHERE rf_fk = $1 AND usage = $2;";
407      $statement = __METHOD__ . ".getCurrentMapping";
408      $row = $this->dbManager->getSingleRow($sql, array($to, $usage), $statement);
409      if (!empty($row) && $row['rf_parent'] != $from) {
410        $this->dbManager->updateTableRow("license_map", array(
411          'rf_fk' => $to,
412          'rf_parent' => $from,
413          'usage' => $usage
414        ), 'license_map_pk', $row['license_map_pk']);
415        $return = true;
416      } elseif (empty($row)) {
417        $this->dbManager->insertTableRow('license_map', array(
418          'rf_fk' => $to,
419          'rf_parent' => $from,
420          'usage' => $usage
421        ));
422        $return = true;
423      }
424    }
425    return $return;
426  }
427
428  /**
429   * @brief Insert a new license in DB
430   *
431   * Creates a new main license/candidate license based on table name sent
432   * and if the required group exists in DB.
433   * @param array $row        Rows comming from CSV
434   * @param string $tableName Table where this new license should go to
435   * @return string Log messages
436   */
437  private function insertNewLicense($row, $tableName = "license_ref")
438  {
439    $stmtInsert = __METHOD__ . '.insert.' . $tableName;
440    $columns = array(
441      "rf_shortname" => $row['shortname'],
442      "rf_fullname"  => $row['fullname'],
443      "rf_text"      => $row['text'],
444      "rf_md5"       => md5($row['text']),
445      "rf_detector_type" => 1,
446      "rf_url"       => $row['url'],
447      "rf_notes"     => $row['notes'],
448      "rf_source"    => $row['source'],
449      "rf_risk"      => $row['risk']
450    );
451
452    $as = "";
453    if ($tableName == "license_candidate") {
454      $groupId = $this->userDao->getGroupIdByName($row['group']);
455      if (empty($groupId)) {
456        return "Error: Unable to insert candidate license " . $row['shortname'] .
457          " as group " . $row['group'] . " does not exist";
458      }
459      $columns["group_fk"] = $groupId;
460      $columns["marydone"] = $this->dbManager->booleanToDb(true);
461      $as = " as candidate license under group " . $row["group"];
462    }
463
464    $newPk = $this->dbManager->insertTableRow($tableName, $columns, $stmtInsert, 'rf_pk');
465
466    if ($tableName == "license_candidate") {
467      $this->nkMap[$row['shortname'].$row['group']] = $newPk;
468    } else {
469      $this->nkMap[$row['shortname']] = $newPk;
470    }
471    $this->mdkMap[md5($row['text'])] = $newPk;
472    $return = "Inserted '$row[shortname]' in DB" . $as;
473
474    if ($this->insertMapIfNontrivial($row['parent_shortname'], $row['shortname'], LicenseMap::CONCLUSION)) {
475      $return .= " with conclusion '$row[parent_shortname]'";
476    }
477    if ($this->insertMapIfNontrivial($row['report_shortname'], $row['shortname'], LicenseMap::REPORT)) {
478      $return .= " reporting '$row[report_shortname]'";
479    }
480    return $return;
481  }
482}
483