1 /*
2 Author: Daniele Fognini, Andreas Wuerl, Johannes Najjar
3 Copyright (C) 2014, Siemens AG
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18 /**
19 * \file copyright.cc
20 * \brief Copyright agent
21 * \page copyright Copyright Agent
22 * \tableofcontents
23 *
24 * Copyright agent uses regular expressions to find out copyright
25 * statments, author statements, URLs and Emails in uploads.
26 *
27 * Copyright agent also create ecc agent which also uses regular
28 * expressions to find ecc statements in uploads.
29 *
30 * The agent runs in multi-threaded mode and creates a new thread for
31 * every pfile for faster processing.
32 *
33 * \section copyrightactions Supported actions
34 * | Command line flag | Description |
35 * | ---: | :--- |
36 * | -h [--help] | Shows help |
37 * | -T [--type] arg (=15) | Type of regex to try |
38 * | -v [--verbose] | Increase verbosity |
39 * | --regex arg | User defined Regex to search: |
40 * || `[{name=cli}@@][{matchingGroup=0}@@]{regex}` |
41 * || e.g. 'linux@@1@@(linus) torvalds' |
42 * | --files arg | Files to scan |
43 * | -J [--json] | Output JSON |
44 * | -d [--directory] | Directory to scan (recursive) |
45 * \section copyrightsource Agent source
46 * - \link src/copyright/agent \endlink
47 * - \link src/copyright/ui \endlink
48 * - Functional test cases \link src/copyright/agent_tests/Functional \endlink
49 * - Unit test cases \link src/copyright/agent_tests/Unit \endlink
50 */
51 #include <stdio.h>
52 #include <iostream>
53 #include <sstream>
54
55 #include "copyright.hpp"
56
57 using namespace std;
58 using namespace fo;
59
60 #define return_sched(retval) \
61 do {\
62 fo_scheduler_disconnect((retval));\
63 return (retval);\
64 } while(0)
65
main(int argc,char ** argv)66 int main(int argc, char** argv)
67 {
68 /* before parsing argv and argc make sure */
69 /* to initialize the scheduler connection */
70
71 CliOptions cliOptions;
72 vector<string> fileNames;
73 string directoryToScan;
74 if (!parseCliOptions(argc, argv, cliOptions, fileNames, directoryToScan))
75 {
76 return_sched(1);
77 }
78
79 bool json = cliOptions.doJsonOutput();
80 bool ignoreFilesWithMimeType = cliOptions.doignoreFilesWithMimeType();
81 CopyrightState state = getState(std::move(cliOptions));
82
83 if (!fileNames.empty())
84 {
85 const unsigned long fileNamesCount = fileNames.size();
86 bool fileError = false;
87 bool printComma = false;
88
89 if (json)
90 {
91 cout << "[" << endl;
92 }
93
94 #pragma omp parallel
95 {
96 #pragma omp for
97 for (unsigned int argn = 0; argn < fileNamesCount; ++argn)
98 {
99 const string fileName = fileNames[argn];
100 pair<string, list<match>> scanResult = processSingleFile(state, fileName);
101 if (json)
102 {
103 appendToJson(fileName, scanResult, printComma);
104 }
105 else
106 {
107 printResultToStdout(fileName, scanResult);
108 }
109 if (scanResult.first.empty())
110 {
111 fileError = true;
112 }
113 }
114 }
115 if (json)
116 {
117 cout << endl << "]" << endl;
118 }
119 return fileError ? 1 : 0;
120 }
121 else if (directoryToScan.length() > 0)
122 {
123 scanDirectory(state, json, directoryToScan);
124 }
125 else
126 {
127 DbManager dbManager(&argc, argv);
128 int agentId = queryAgentId(dbManager.getConnection());
129
130 CopyrightDatabaseHandler copyrightDatabaseHandler(dbManager);
131 if (!copyrightDatabaseHandler.createTables())
132 {
133 std::cout << "FATAL: initialization failed" << std::endl;
134 return_sched(9);
135 }
136
137 while (fo_scheduler_next() != NULL)
138 {
139 int uploadId = atoi(fo_scheduler_current());
140
141 if (uploadId <= 0) continue;
142
143 int arsId = writeARS(agentId, 0, uploadId, 0, dbManager);
144
145 if (arsId <= 0)
146 return_sched(5);
147
148 if (!processUploadId(state, agentId, uploadId, copyrightDatabaseHandler, ignoreFilesWithMimeType))
149 return_sched(2);
150
151 fo_scheduler_heart(0);
152 writeARS(agentId, arsId, uploadId, 1, dbManager);
153 }
154 fo_scheduler_heart(0);
155 /* do not use bail, as it would prevent the destructors from running */
156 return_sched(0);
157 }
158 }
159
160