1 /*
2 Author: Daniele Fognini, Andreas Wuerl, Johannes Najjar
3 Copyright (C) 2014, Siemens AG
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation.
8 
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18 /**
19  * \file copyright.cc
20  * \brief Copyright agent
21  * \page copyright Copyright Agent
22  * \tableofcontents
23  *
24  * Copyright agent uses regular expressions to find out copyright
25  * statments, author statements, URLs and Emails in uploads.
26  *
27  * Copyright agent also create ecc agent which also uses regular
28  * expressions to find ecc statements in uploads.
29  *
30  * The agent runs in multi-threaded mode and creates a new thread for
31  * every pfile for faster processing.
32  *
33  * \section copyrightactions Supported actions
34  * | Command line flag | Description |
35  * | ---: | :--- |
36  * | -h [--help] | Shows help |
37  * | -T [--type] arg (=15) | Type of regex to try |
38  * | -v [--verbose] | Increase verbosity |
39  * | --regex arg | User defined Regex to search: |
40  * || `[{name=cli}@@][{matchingGroup=0}@@]{regex}` |
41  * || e.g. 'linux@@1@@(linus) torvalds' |
42  * | --files arg | Files to scan |
43  * | -J [--json] | Output JSON |
44  * | -d [--directory] | Directory to scan (recursive) |
45  * \section copyrightsource Agent source
46  *   - \link src/copyright/agent \endlink
47  *   - \link src/copyright/ui \endlink
48  *   - Functional test cases \link src/copyright/agent_tests/Functional \endlink
49  *   - Unit test cases \link src/copyright/agent_tests/Unit \endlink
50  */
51 #include <stdio.h>
52 #include <iostream>
53 #include <sstream>
54 
55 #include "copyright.hpp"
56 
57 using namespace std;
58 using namespace fo;
59 
60 #define return_sched(retval) \
61   do {\
62     fo_scheduler_disconnect((retval));\
63     return (retval);\
64   } while(0)
65 
main(int argc,char ** argv)66 int main(int argc, char** argv)
67 {
68   /* before parsing argv and argc make sure */
69   /* to initialize the scheduler connection */
70 
71   CliOptions cliOptions;
72   vector<string> fileNames;
73   string directoryToScan;
74   if (!parseCliOptions(argc, argv, cliOptions, fileNames, directoryToScan))
75   {
76     return_sched(1);
77   }
78 
79   bool json = cliOptions.doJsonOutput();
80   bool ignoreFilesWithMimeType = cliOptions.doignoreFilesWithMimeType();
81   CopyrightState state = getState(std::move(cliOptions));
82 
83   if (!fileNames.empty())
84   {
85     const unsigned long fileNamesCount = fileNames.size();
86     bool fileError = false;
87     bool printComma = false;
88 
89     if (json)
90     {
91       cout << "[" << endl;
92     }
93 
94 #pragma omp parallel
95     {
96 #pragma omp for
97       for (unsigned int argn = 0; argn < fileNamesCount; ++argn)
98       {
99         const string fileName = fileNames[argn];
100         pair<string, list<match>> scanResult = processSingleFile(state, fileName);
101         if (json)
102         {
103           appendToJson(fileName, scanResult, printComma);
104         }
105         else
106         {
107           printResultToStdout(fileName, scanResult);
108         }
109         if (scanResult.first.empty())
110         {
111           fileError = true;
112         }
113       }
114     }
115     if (json)
116     {
117       cout << endl << "]" << endl;
118     }
119     return fileError ? 1 : 0;
120   }
121   else if (directoryToScan.length() > 0)
122   {
123     scanDirectory(state, json, directoryToScan);
124   }
125   else
126   {
127     DbManager dbManager(&argc, argv);
128     int agentId = queryAgentId(dbManager.getConnection());
129 
130     CopyrightDatabaseHandler copyrightDatabaseHandler(dbManager);
131     if (!copyrightDatabaseHandler.createTables())
132     {
133       std::cout << "FATAL: initialization failed" << std::endl;
134       return_sched(9);
135     }
136 
137     while (fo_scheduler_next() != NULL)
138     {
139       int uploadId = atoi(fo_scheduler_current());
140 
141       if (uploadId <= 0) continue;
142 
143       int arsId = writeARS(agentId, 0, uploadId, 0, dbManager);
144 
145       if (arsId <= 0)
146         return_sched(5);
147 
148       if (!processUploadId(state, agentId, uploadId, copyrightDatabaseHandler, ignoreFilesWithMimeType))
149         return_sched(2);
150 
151       fo_scheduler_heart(0);
152       writeARS(agentId, arsId, uploadId, 1, dbManager);
153     }
154     fo_scheduler_heart(0);
155     /* do not use bail, as it would prevent the destructors from running */
156     return_sched(0);
157   }
158 }
159 
160