1 /*
2 Author: Daniele Fognini, Andreas Wuerl
3 Copyright (C) 2013-2015, Siemens AG
4 
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 version 2 as published by the Free Software Foundation.
8 
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 */
18 
19 #include <glib.h>
20 
21 #include "license.h"
22 #include "string_operations.h"
23 #include "monk.h"
24 
25 static char* ignoredLicenseNames[] = {"Void", "No_license_found"};
26 static char* ignoredLicenseTexts[] = {"License by Nomos.", "License by Ninka."};
27 
isIgnoredLicense(const License * license)28 int isIgnoredLicense(const License* license) {
29 
30   int ignoredLicenseNamesCount = sizeof(ignoredLicenseNames)/sizeof(char*);
31   for (int i = 0; i < ignoredLicenseNamesCount; i++) {
32     if (strcmp(license->shortname, ignoredLicenseNames[i]) == 0)
33       return 1;
34   }
35 
36   int ignoredLicenseTextsCount = sizeof(ignoredLicenseTexts)/sizeof(char*);
37   for (int i = 0; i < ignoredLicenseTextsCount; i++) {
38     GArray* ignoredTokens = tokenize(ignoredLicenseTexts[i], DELIMITERS);
39     if (tokensEquals(license->tokens, ignoredTokens)) {
40       tokens_free(ignoredTokens);
41       return 1;
42     }
43     tokens_free(ignoredTokens);
44   }
45 
46   return 0;
47 }
48 
extractLicenses(fo_dbManager * dbManager,PGresult * licensesResult,unsigned minAdjacentMatches,unsigned maxLeadingDiff)49 Licenses* extractLicenses(fo_dbManager* dbManager, PGresult* licensesResult, unsigned minAdjacentMatches, unsigned maxLeadingDiff) {
50   GArray* licenses = g_array_new(TRUE, FALSE, sizeof (License));
51 
52   for (int j = 0; j < PQntuples(licensesResult); j++) {
53     long refId = atol(PQgetvalue(licensesResult, j, 0));
54     char* licShortName = PQgetvalue(licensesResult, j, 1);
55 
56     License license;
57     license.refId = refId;
58     license.shortname = g_strdup(licShortName);
59 
60     char* licenseText = getLicenseTextForLicenseRefId(dbManager, refId);
61     GArray* licenseTokens = tokenize(licenseText, DELIMITERS);
62 
63     free(licenseText);
64     license.tokens = licenseTokens;
65 
66     if (!isIgnoredLicense(&license))
67       g_array_append_val(licenses, license);
68     else {
69       tokens_free(license.tokens);
70       g_free(license.shortname);
71     }
72   }
73 
74   return buildLicenseIndexes(licenses, minAdjacentMatches, maxLeadingDiff);
75 }
76 
licenses_free(Licenses * licenses)77 void licenses_free(Licenses* licenses) {
78   if (licenses) {
79     GArray* licenseArray = licenses->licenses;
80     for (guint i = 0; i < licenseArray->len; i++) {
81       License* license = license_index(licenseArray, i);
82       tokens_free(license->tokens);
83       if (license->shortname) {
84         g_free(license->shortname);
85       }
86     }
87 
88     g_array_free(licenseArray, TRUE);
89 
90     g_array_free(licenses->shortLicenses, TRUE);
91 
92     GArray* indexes = licenses->indexes;
93     for (guint i = 0; i < indexes->len; i++) {
94       GHashTable* index = g_array_index(indexes, GHashTable*, i);
95       g_hash_table_unref(index);
96     }
97     g_array_free(indexes, TRUE);
98 
99     free(licenses);
100   }
101 }
102 
uint32_hash(gconstpointer v)103 guint uint32_hash (gconstpointer v) {
104   uint32_t u = *(uint32_t*)v;
105   return u;
106 }
107 
uint32_equal(gconstpointer v1,gconstpointer v2)108 gboolean uint32_equal (gconstpointer  v1, gconstpointer  v2) {
109   uint32_t u1 = *(uint32_t*)v1;
110   uint32_t u2 = *(uint32_t*)v2;
111 
112   return u1 == u2;
113 }
114 
g_array_free_true(void * ptr)115 static void g_array_free_true(void* ptr) {
116   g_array_free(ptr, TRUE);
117 }
118 
getKey(const GArray * tokens,unsigned minAdjacentMatches,unsigned searchedStart)119 uint32_t getKey(const GArray* tokens, unsigned minAdjacentMatches, unsigned searchedStart) {
120   uint32_t result = 1;
121   for (guint i = 0; (i < minAdjacentMatches) && (i+searchedStart < tokens->len); i++)
122   {
123     Token* nToken = tokens_index(tokens, i+searchedStart);
124     result = (result << 1) + nToken->hashedContent;
125   }
126 
127   return result;
128 }
129 
buildLicenseIndexes(GArray * licenses,unsigned minAdjacentMatches,unsigned maxLeadingDiff)130 Licenses* buildLicenseIndexes(GArray* licenses, unsigned minAdjacentMatches, unsigned maxLeadingDiff) {
131   Licenses* result = malloc(sizeof(Licenses));
132   if (!result)
133     return NULL;
134 
135 #define is_short(license) ( (license)->tokens->len <= minAdjacentMatches )
136   GArray* shortLicenses = g_array_new(FALSE, FALSE, sizeof(License));
137   for (guint i = 0; i < licenses->len; i++) {
138     License* license = license_index(licenses, i);
139     if (is_short(license)) {
140       g_array_append_val(shortLicenses, *license);
141     }
142   }
143 
144   GArray* indexes = g_array_new(FALSE, FALSE, sizeof(GHashTable*));
145 
146   for (unsigned sPos = 0; sPos <= maxLeadingDiff; sPos++) {
147     GHashTable* index = g_hash_table_new_full(uint32_hash, uint32_equal, free, g_array_free_true);
148     g_array_append_val(indexes, index);
149 
150     for (guint i = 0; i < licenses->len; i++) {
151       License* license = license_index(licenses, i);
152       if (!is_short(license)) {
153         uint32_t* key = malloc(sizeof(uint32_t));
154         *key = getKey(license->tokens, minAdjacentMatches, sPos);
155 
156         GArray* indexedLicenses = g_hash_table_lookup(index, key);
157         if (!indexedLicenses)
158         {
159           indexedLicenses = g_array_new(FALSE, FALSE, sizeof(License));
160           g_hash_table_replace(index, key, indexedLicenses);
161         } else {
162           free(key);
163         }
164         g_array_append_val(indexedLicenses, *license);
165       }
166     }
167   }
168 #undef is_short
169 
170   result->licenses = licenses;
171   result->shortLicenses = shortLicenses;
172   result->indexes = indexes;
173   result->minAdjacentMatches = minAdjacentMatches;
174 
175   return result;
176 }
177 
getShortLicenseArray(const Licenses * licenses)178 const GArray* getShortLicenseArray(const Licenses* licenses) {
179   return licenses->shortLicenses;
180 }
181 
getLicenseArrayFor(const Licenses * licenses,unsigned searchPos,const GArray * searchedTokens,unsigned searchedStart)182 const GArray* getLicenseArrayFor(const Licenses* licenses, unsigned searchPos, const GArray* searchedTokens, unsigned searchedStart) {
183   const GArray* indexes = licenses->indexes;
184 
185   guint minAdjacentMatches = licenses->minAdjacentMatches;
186 
187   if (indexes->len <= searchPos) {
188     return licenses->licenses;
189   }
190 
191   GHashTable* index = g_array_index(indexes, GHashTable*, searchPos);
192   uint32_t key = getKey(searchedTokens, minAdjacentMatches, searchedStart);
193   GArray* result = g_hash_table_lookup(index, &key);
194   return result;
195 }
196