1 /*
2  * Copyright (C) 2014-2017, Siemens AG
3  * Author: Daniele Fognini, Johannes Najjar
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12  * See the GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 #include "database.hpp"
20 #include "identity.hpp"
21 
22 #include <iostream>
23 #include <libfossUtils.hpp>
24 
25 using namespace fo;
26 
27 #define RETURN_IF_FALSE(query) \
28   do {\
29     if (!(query)) {\
30       return false;\
31     }\
32   } while(0)
33 
34 /**
35  * \brief Default constructor for DatabaseEntry
36  */
DatabaseEntry()37 DatabaseEntry::DatabaseEntry() :
38         agent_fk(0),
39         pfile_fk(0),
40         content(""),
41         hash(""),
42         type(""),
43         copy_startbyte(0),
44         copy_endbyte(0)
45 {
46 };
47 
48 /**
49  * \brief Spawn/fork a new database handler and return it
50  * \return CopyrightDatabaseHandler object with spawned DbManager
51  */
spawn() const52 CopyrightDatabaseHandler CopyrightDatabaseHandler::spawn() const
53 {
54   DbManager spawnedDbMan(dbManager.spawn());
55   return CopyrightDatabaseHandler(spawnedDbMan);
56 }
57 
58 /**
59  * \brief Given a list of ColumnDef, return a comma separated list of column names
60  * \param in   List to parse
61  * \param size Number of elements in the list
62  * \return Comma separated list of column names
63  * \see CopyrightDatabaseHandler::ColumnDef
64  */
getColumnListString(const CopyrightDatabaseHandler::ColumnDef in[],size_t size) const65 std::string CopyrightDatabaseHandler::getColumnListString(const CopyrightDatabaseHandler::ColumnDef in[], size_t size) const
66 {
67   std::string result;
68   for (size_t i = 0; i < size; ++i)
69   {
70     if (i != 0)
71       result += ", ";
72     result += in[i].name;
73   }
74   return result;
75 }
76 
77 /**
78  * \brief Return a comma delimited string with column elements separated by space.
79  * The string is used for database creation
80  * \param in   List of column to be parsed
81  * \param size Number of elements in the list
82  * \return Comma delimited string
83  * \see CopyrightDatabaseHandler::createTableAgentFindings()
84  */
getColumnCreationString(const CopyrightDatabaseHandler::ColumnDef in[],size_t size) const85 std::string CopyrightDatabaseHandler::getColumnCreationString(const CopyrightDatabaseHandler::ColumnDef in[], size_t size) const
86 {
87   std::string result;
88   for (size_t i = 0; i < size; ++i)
89   {
90     if (i != 0)
91       result += ", ";
92     result += in[i].name;
93     result += " ";
94     result += in[i].type;
95     result += " ";
96     result += in[i].creationFlags;
97   }
98   return result;
99 }
100 
101 /**
102  * \brief Create tables required by agent
103  *
104  * Calls createTableAgentFindings() and createTableClearing()
105  * to create the tables required by the agent to work.
106  *
107  * The function tries to create table in maximum of MAX_TABLE_CREATION_RETRIES
108  * attempts.
109  * \return True if success, false otherwise
110  */
createTables() const111 bool CopyrightDatabaseHandler::createTables() const
112 {
113   int failedCounter = 0;
114   bool tablesChecked = false;
115 
116   dbManager.ignoreWarnings(true);
117   while (!tablesChecked && failedCounter < MAX_TABLE_CREATION_RETRIES)
118   {
119     dbManager.begin();
120 
121     tablesChecked = createTableAgentFindings() && createTableClearing();
122 
123     if (tablesChecked)
124       dbManager.commit();
125     else
126     {
127       dbManager.rollback();
128       ++failedCounter;
129       if (failedCounter < MAX_TABLE_CREATION_RETRIES)
130         std::cout << "WARNING: table creation failed: trying again"
131           " (" << failedCounter << "/" << MAX_TABLE_CREATION_RETRIES << ")"
132           << std::endl;
133     }
134   }
135   if (tablesChecked && (failedCounter > 0))
136     std::cout << "NOTICE: table creation succeded on try "
137       << failedCounter << "/" << MAX_TABLE_CREATION_RETRIES
138       << std::endl;
139 
140   dbManager.ignoreWarnings(false);
141   return tablesChecked;
142 }
143 
144 /**
145  * \brief Columns required by agent in database
146  * \todo Removed constrain: "CHECK (type in ('statement', 'email', 'url'))"}
147  */
148 const CopyrightDatabaseHandler::ColumnDef CopyrightDatabaseHandler::columns[] =
149   {
150 #define SEQUENCE_NAME IDENTITY"_pk_seq"
151 #define COLUMN_NAME_PK IDENTITY"_pk"
152     { COLUMN_NAME_PK, "bigint", "PRIMARY KEY DEFAULT nextval('" SEQUENCE_NAME "'::regclass)"},
153     {"agent_fk", "bigint", "NOT NULL"},
154     {"pfile_fk", "bigint", "NOT NULL"},
155     {"content", "text", ""},
156     {"hash", "text", ""},
157     {"type", "text", ""}, //TODO removed constrain: "CHECK (type in ('statement', 'email', 'url'))"},
158     {"copy_startbyte", "integer", ""},
159     {"copy_endbyte", "integer", ""},
160     {"is_enabled", "boolean", "NOT NULL DEFAULT TRUE"},
161   };
162 
163 /**
164  * \brief Create table to store agent find data
165  * \return True on success, false otherwise
166  * \see CopyrightDatabaseHandler::columns
167  */
createTableAgentFindings() const168 bool CopyrightDatabaseHandler::createTableAgentFindings() const
169 {
170   if (!dbManager.sequenceExists(SEQUENCE_NAME))
171   {
172     RETURN_IF_FALSE(dbManager.queryPrintf("CREATE SEQUENCE "
173       SEQUENCE_NAME
174       " START WITH 1"
175         " INCREMENT BY 1"
176         " NO MAXVALUE"
177         " NO MINVALUE"
178         " CACHE 1"));
179   }
180 
181   if (!dbManager.tableExists(IDENTITY))
182   {
183     size_t ncolumns = (sizeof(CopyrightDatabaseHandler::columns) / sizeof(CopyrightDatabaseHandler::ColumnDef));
184     RETURN_IF_FALSE(dbManager.queryPrintf("CREATE table %s(%s)", IDENTITY,
185       getColumnCreationString(CopyrightDatabaseHandler::columns, ncolumns).c_str()
186     )
187     );
188     RETURN_IF_FALSE(dbManager.queryPrintf(
189       "CREATE INDEX %s_agent_fk_index"
190         " ON %s"
191         " USING BTREE (agent_fk)",
192       IDENTITY, IDENTITY
193     ));
194 
195     RETURN_IF_FALSE(dbManager.queryPrintf(
196       "CREATE INDEX %s_hash_index"
197         " ON %s"
198         " USING BTREE (hash)",
199       IDENTITY, IDENTITY
200     ));
201 
202     RETURN_IF_FALSE(dbManager.queryPrintf(
203       "CREATE INDEX %s_pfile_fk_index"
204         " ON %s"
205         " USING BTREE (pfile_fk)",
206       IDENTITY, IDENTITY
207     ));
208 
209     RETURN_IF_FALSE(dbManager.queryPrintf(
210       "ALTER TABLE ONLY %s"
211         " ADD CONSTRAINT agent_fk"
212         " FOREIGN KEY (agent_fk)"
213         " REFERENCES agent(agent_pk) ON DELETE CASCADE",
214       IDENTITY
215     ));
216 
217     RETURN_IF_FALSE(dbManager.queryPrintf(
218       "ALTER TABLE ONLY %s"
219         " ADD CONSTRAINT pfile_fk"
220         " FOREIGN KEY (pfile_fk)"
221         " REFERENCES pfile(pfile_pk) ON DELETE CASCADE",
222       IDENTITY
223     ));
224   }
225   return true;
226 }
227 
228 /**
229  * \brief Columns required to store user decisions in database.
230  */
231 const CopyrightDatabaseHandler::ColumnDef CopyrightDatabaseHandler::columnsDecision[] = {
232 #define SEQUENCE_NAMEClearing IDENTITY"_decision_pk_seq"
233   {IDENTITY"_decision_pk", "bigint", "PRIMARY KEY DEFAULT nextval('" SEQUENCE_NAMEClearing "'::regclass)"},
234   {"user_fk", "bigint", "NOT NULL"},
235   {"pfile_fk", "bigint", "NOT NULL"},
236   {"clearing_decision_type_fk", "bigint", "NOT NULL"},
237   {"description", "text", ""},
238   {"textFinding", "text", ""},
239   {"comment", "text", ""},
240   {"is_enabled", "boolean", "NOT NULL DEFAULT TRUE"}
241 };
242 
243 /**
244  * \brief Create table to store user decisions
245  * \return True on success, false otherwise
246  * \see CopyrightDatabaseHandler::columnsDecision
247  */
createTableClearing() const248 bool CopyrightDatabaseHandler::createTableClearing() const
249 {
250   #define CLEARING_TABLE IDENTITY "_decision"
251 
252   if (!dbManager.sequenceExists(SEQUENCE_NAMEClearing))
253   {
254     RETURN_IF_FALSE(dbManager.queryPrintf("CREATE SEQUENCE "
255       SEQUENCE_NAMEClearing
256       " START WITH 1"
257         " INCREMENT BY 1"
258         " NO MAXVALUE"
259         " NO MINVALUE"
260         " CACHE 1"));
261   }
262 
263   if (!dbManager.tableExists(CLEARING_TABLE))
264   {
265     size_t nDec = (sizeof(CopyrightDatabaseHandler::columnsDecision) / sizeof(CopyrightDatabaseHandler::ColumnDef));
266     RETURN_IF_FALSE(dbManager.queryPrintf("CREATE table %s(%s)", CLEARING_TABLE,
267       getColumnCreationString(CopyrightDatabaseHandler::columnsDecision, nDec).c_str()));
268 
269     RETURN_IF_FALSE(dbManager.queryPrintf(
270       "CREATE INDEX %s_pfile_fk_index"
271         " ON %s"
272         " USING BTREE (pfile_fk)",
273       CLEARING_TABLE, CLEARING_TABLE
274     ));
275 
276     RETURN_IF_FALSE(dbManager.queryPrintf(
277       "CREATE INDEX %s_user_fk_index"
278         " ON %s"
279         " USING BTREE (user_fk)",
280       CLEARING_TABLE, CLEARING_TABLE
281     ));
282 
283     RETURN_IF_FALSE(dbManager.queryPrintf(
284       "CREATE INDEX %s_clearing_decision_type_fk_index"
285         " ON %s"
286         " USING BTREE (clearing_decision_type_fk)",
287       CLEARING_TABLE, CLEARING_TABLE
288     ));
289 
290     RETURN_IF_FALSE(dbManager.queryPrintf(
291       "ALTER TABLE ONLY %s"
292         " ADD CONSTRAINT user_fk"
293         " FOREIGN KEY (user_fk)"
294         " REFERENCES  users(user_pk) ON DELETE CASCADE",
295       CLEARING_TABLE
296     ));
297 
298     RETURN_IF_FALSE(dbManager.queryPrintf(
299       "ALTER TABLE ONLY %s"
300         " ADD CONSTRAINT pfile_fk"
301         " FOREIGN KEY (pfile_fk)"
302         " REFERENCES pfile(pfile_pk) ON DELETE CASCADE",
303       CLEARING_TABLE
304     ));
305   }
306 
307   return true;
308 }
309 
310 /**
311  * \brief Get the list of pfile ids on which the given agent has no findings for a given upload
312  * \param agentId  Agent id to be removed from result
313  * \param uploadId Upload id to scan for files
314  * \param ignoreFilesWithMimeType to exclude filetypes with particular mimetype
315  * \return List of pfiles on which the given agent has no findings
316  */
queryFileIdsForUpload(int agentId,int uploadId,bool ignoreFilesWithMimeType)317 std::vector<unsigned long> CopyrightDatabaseHandler::queryFileIdsForUpload(int agentId, int uploadId, bool ignoreFilesWithMimeType)
318 {
319   std::string uploadTreeTableName = queryUploadTreeTableName(uploadId);
320   fo_dbManager_PreparedStatement* preparedStatement;
321   std::string sql = "SELECT pfile_pk"
322     " FROM ("
323     "  SELECT distinct(pfile_fk) AS PF"
324     "  FROM " + uploadTreeTableName +
325     "  WHERE upload_fk = $1 and (ufile_mode&x'3C000000'::int)=0"
326     " ) AS SS "
327     "LEFT OUTER JOIN " IDENTITY " ON (PF = pfile_fk AND agent_fk = $2) "
328 #ifdef IDENTITY_COPYRIGHT
329     "LEFT OUTER JOIN author AS au ON (PF = au.pfile_fk AND au.agent_fk = $2) "
330 #endif
331     "INNER JOIN pfile ON (PF = pfile_pk) "
332 #ifdef IDENTITY_COPYRIGHT
333     "WHERE copyright.copyright_pk IS NULL AND au.author_pk IS NULL"
334 #else
335     "WHERE (" IDENTITY "_pk IS NULL OR agent_fk <> $2)"
336 #endif
337     ;
338   std::string statementName = "queryFileIdsForUpload:" IDENTITY "Agent" + uploadTreeTableName;
339   if (ignoreFilesWithMimeType)
340   {
341     sql = sql + " AND (pfile_mimetypefk NOT IN ( "
342       "SELECT mimetype_pk FROM mimetype WHERE mimetype_name=ANY(string_to_array(( "
343       "SELECT conf_value FROM sysconfig WHERE variablename='SkipFiles'),','))));";
344     statementName = statementName + "withMimetype";
345   }
346   preparedStatement =
347     fo_dbManager_PrepareStamement(dbManager.getStruct_dbManager(),
348       statementName.c_str(),
349       sql.c_str(),
350       int, int);
351   QueryResult queryResult = dbManager.execPrepared(preparedStatement,
352       uploadId, agentId);
353 
354   return queryResult.getSimpleResults<unsigned long>(0, fo::stringToUnsignedLong);
355 
356 }
357 
358 /**
359  * \brief Insert empty findings in database to prevent scan on next upload
360  * \param agentId Id of agent which did not find any statement
361  * \param pFileId Id of the file on which no statements were found
362  * \return True on success, false otherwise
363  */
insertNoResultInDatabase(long int agentId,long int pFileId) const364 bool CopyrightDatabaseHandler::insertNoResultInDatabase(long int agentId, long int pFileId) const
365 {
366   return dbManager.execPrepared(
367     fo_dbManager_PrepareStamement(
368       dbManager.getStruct_dbManager(),
369       "insertNoResultInDatabase",
370       "INSERT INTO "
371       IDENTITY
372       "(agent_fk, pfile_fk) VALUES($1,$2)",
373       long, long
374     ),
375     agentId, pFileId
376   );
377 }
378 
379 /**
380  * \brief Insert a finding in database
381  * \param entry Entry to be inserted in the database
382  * \return True on success, false otherwise
383  * \see DatabaseEntry
384  */
insertInDatabase(DatabaseEntry & entry) const385 bool CopyrightDatabaseHandler::insertInDatabase(DatabaseEntry& entry) const
386 {
387   std::string tableName = IDENTITY;
388 
389   if("author" == entry.type ||
390      "email" == entry.type ||
391      "url" == entry.type){
392     tableName = "author";
393   }
394 
395   return dbManager.execPrepared(
396     fo_dbManager_PrepareStamement(
397       dbManager.getStruct_dbManager(),
398       ("insertInDatabaseFor" + tableName).c_str(),
399       ("INSERT INTO "+ tableName +
400       "(agent_fk, pfile_fk, content, hash, type, copy_startbyte, copy_endbyte)" +
401         " VALUES($1,$2,$3,md5($3),$4,$5,$6)").c_str(),
402         long, long, char*, char*, int, int
403     ),
404     entry.agent_fk, entry.pfile_fk,
405     entry.content.c_str(),
406     entry.type.c_str(),
407     entry.copy_startbyte, entry.copy_endbyte
408   );
409 }
410 
411 /**
412  * \brief Constructor to initialize database handler
413  */
CopyrightDatabaseHandler(DbManager manager)414 CopyrightDatabaseHandler::CopyrightDatabaseHandler(DbManager manager) :
415   AgentDatabaseHandler(manager)
416 {
417 
418 }
419