1 /*
2  * This file is part of dependency-check-core.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  * Copyright (c) 2013 Jeremy Long. All Rights Reserved.
17  */
18 package org.owasp.dependencycheck.data.cpe;
19 
20 import java.io.IOException;
21 import java.util.HashMap;
22 import java.util.Map;
23 import java.util.Set;
24 import java.util.concurrent.atomic.AtomicInteger;
25 import javax.annotation.concurrent.ThreadSafe;
26 import org.apache.lucene.analysis.Analyzer;
27 import org.apache.lucene.analysis.core.KeywordAnalyzer;
28 import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
29 import org.apache.lucene.document.Document;
30 import org.apache.lucene.document.Field;
31 import org.apache.lucene.document.TextField;
32 import org.apache.lucene.index.CorruptIndexException;
33 import org.apache.lucene.index.DirectoryReader;
34 import org.apache.lucene.index.IndexReader;
35 import org.apache.lucene.index.IndexWriter;
36 import org.apache.lucene.index.IndexWriterConfig;
37 import org.apache.lucene.queryparser.classic.ParseException;
38 import org.apache.lucene.queryparser.classic.QueryParser;
39 import org.apache.lucene.search.IndexSearcher;
40 import org.apache.lucene.search.Query;
41 import org.apache.lucene.search.TopDocs;
42 import org.apache.lucene.store.RAMDirectory;
43 import org.owasp.dependencycheck.data.lucene.SearchFieldAnalyzer;
44 import org.owasp.dependencycheck.data.nvdcve.CveDB;
45 import org.owasp.dependencycheck.data.nvdcve.DatabaseException;
46 import org.owasp.dependencycheck.utils.Pair;
47 import org.slf4j.Logger;
48 import org.slf4j.LoggerFactory;
49 
50 /**
51  * <p>
52  * An in memory Lucene index that contains the vendor/product combinations from
53  * the CPE (application) identifiers within the NVD CVE data.</p>
54  *
55  * This is the last remaining singleton in dependency-check-core; The use of
56  * this singleton - while it may not technically be thread-safe (one database
57  * used to build this index may not have the same entries as another) the risk
58  * of this is currently believed to be small. As this memory index consumes a
59  * large amount of memory we will remain using the singleton pattern for now.
60  *
61  * @author Jeremy Long
62  */
63 @ThreadSafe
64 public final class CpeMemoryIndex implements AutoCloseable {
65 
66     /**
67      * Singleton instance.
68      */
69     private static final CpeMemoryIndex INSTANCE = new CpeMemoryIndex();
70     /**
71      * The logger.
72      */
73     private static final Logger LOGGER = LoggerFactory.getLogger(CpeMemoryIndex.class);
74     /**
75      * The in memory Lucene index.
76      */
77     private RAMDirectory index;
78     /**
79      * The Lucene IndexReader.
80      */
81     private IndexReader indexReader;
82     /**
83      * The Lucene IndexSearcher.
84      */
85     private IndexSearcher indexSearcher;
86     /**
87      * The Lucene Analyzer used for Searching.
88      */
89     private Analyzer searchingAnalyzer;
90     /**
91      * The Lucene QueryParser used for Searching.
92      */
93     private QueryParser queryParser;
94     /**
95      * Track the number of current users of the Lucene index; used to track it
96      * it is okay to actually close the index.
97      */
98     private final AtomicInteger usageCount = new AtomicInteger(0);
99 
100     /**
101      * private constructor for singleton.
102      */
CpeMemoryIndex()103     private CpeMemoryIndex() {
104     }
105 
106     /**
107      * Gets the singleton instance of the CpeMemoryIndex.
108      *
109      * @return the instance of the CpeMemoryIndex
110      */
getInstance()111     public static CpeMemoryIndex getInstance() {
112         return INSTANCE;
113     }
114 
115     /**
116      * Creates and loads data into an in memory index.
117      *
118      * @param cve the data source to retrieve the cpe data
119      * @throws IndexException thrown if there is an error creating the index
120      */
open(CveDB cve)121     public synchronized void open(CveDB cve) throws IndexException {
122         if (INSTANCE.usageCount.addAndGet(1) == 1) {
123             index = new RAMDirectory();
124             buildIndex(cve);
125             try {
126                 indexReader = DirectoryReader.open(index);
127             } catch (IOException ex) {
128                 throw new IndexException(ex);
129             }
130             indexSearcher = new IndexSearcher(indexReader);
131             searchingAnalyzer = createSearchingAnalyzer();
132             queryParser = new QueryParser(Fields.DOCUMENT_KEY, searchingAnalyzer);
133         }
134     }
135 
136     /**
137      * returns whether or not the index is open.
138      *
139      * @return whether or not the index is open
140      */
isOpen()141     public synchronized boolean isOpen() {
142         return INSTANCE.usageCount.get() > 0;
143     }
144 
145     /**
146      * Creates an Analyzer for searching the CPE Index.
147      *
148      * @return the CPE Analyzer.
149      */
createSearchingAnalyzer()150     private Analyzer createSearchingAnalyzer() {
151         final Map<String, Analyzer> fieldAnalyzers = new HashMap<>();
152         fieldAnalyzers.put(Fields.DOCUMENT_KEY, new KeywordAnalyzer());
153         final SearchFieldAnalyzer productFieldAnalyzer = new SearchFieldAnalyzer();
154         final SearchFieldAnalyzer vendorFieldAnalyzer = new SearchFieldAnalyzer();
155         fieldAnalyzers.put(Fields.PRODUCT, productFieldAnalyzer);
156         fieldAnalyzers.put(Fields.VENDOR, vendorFieldAnalyzer);
157 
158         return new PerFieldAnalyzerWrapper(new KeywordAnalyzer(), fieldAnalyzers);
159     }
160 
161     /**
162      * Closes the CPE Index.
163      */
164     @Override
close()165     public synchronized void close() {
166         final int count = INSTANCE.usageCount.get() - 1;
167         if (count <= 0) {
168             INSTANCE.usageCount.set(0);
169             if (searchingAnalyzer != null) {
170                 searchingAnalyzer.close();
171                 searchingAnalyzer = null;
172             }
173             if (indexReader != null) {
174                 try {
175                     indexReader.close();
176                 } catch (IOException ex) {
177                     LOGGER.trace("", ex);
178                 }
179                 indexReader = null;
180             }
181             queryParser = null;
182             indexSearcher = null;
183             if (index != null) {
184                 index.close();
185                 index = null;
186             }
187         }
188     }
189 
190     /**
191      * Builds the CPE Lucene Index based off of the data within the CveDB.
192      *
193      * @param cve the data base containing the CPE data
194      * @throws IndexException thrown if there is an issue creating the index
195      */
buildIndex(CveDB cve)196     private void buildIndex(CveDB cve) throws IndexException {
197         try (Analyzer analyzer = createSearchingAnalyzer();
198                 IndexWriter indexWriter = new IndexWriter(index, new IndexWriterConfig(analyzer))) {
199             // Tip: reuse the Document and Fields for performance...
200             // See "Re-use Document and Field instances" from
201             // http://wiki.apache.org/lucene-java/ImproveIndexingSpeed
202             final Document doc = new Document();
203             final Field v = new TextField(Fields.VENDOR, Fields.VENDOR, Field.Store.YES);
204             final Field p = new TextField(Fields.PRODUCT, Fields.PRODUCT, Field.Store.YES);
205             doc.add(v);
206             doc.add(p);
207 
208             final Set<Pair<String, String>> data = cve.getVendorProductList();
209             for (Pair<String, String> pair : data) {
210                 if (pair.getLeft() != null && pair.getRight() != null) {
211                     v.setStringValue(pair.getLeft());
212                     p.setStringValue(pair.getRight());
213                     indexWriter.addDocument(doc);
214                 }
215             }
216             indexWriter.commit();
217             indexWriter.close();
218         } catch (DatabaseException ex) {
219             LOGGER.debug("", ex);
220             throw new IndexException("Error reading CPE data", ex);
221         } catch (IOException ex) {
222             throw new IndexException("Unable to close an in-memory index", ex);
223         }
224     }
225 
226     /**
227      * Searches the index using the given search string.
228      *
229      * @param searchString the query text
230      * @param maxQueryResults the maximum number of documents to return
231      * @return the TopDocs found by the search
232      * @throws ParseException thrown when the searchString is invalid
233      * @throws IOException is thrown if there is an issue with the underlying
234      * Index
235      */
search(String searchString, int maxQueryResults)236     public synchronized TopDocs search(String searchString, int maxQueryResults) throws ParseException, IOException {
237         if (searchString == null || searchString.trim().isEmpty()) {
238             throw new ParseException("Query is null or empty");
239         }
240         LOGGER.debug(searchString);
241         final Query query = queryParser.parse(searchString);
242         return search(query, maxQueryResults);
243     }
244 
245     /**
246      * Searches the index using the given query.
247      *
248      * @param query the query used to search the index
249      * @param maxQueryResults the max number of results to return
250      * @return the TopDocs found be the query
251      * @throws CorruptIndexException thrown if the Index is corrupt
252      * @throws IOException thrown if there is an IOException
253      */
search(Query query, int maxQueryResults)254     public synchronized TopDocs search(Query query, int maxQueryResults) throws CorruptIndexException, IOException {
255         return indexSearcher.search(query, maxQueryResults);
256     }
257 
258     /**
259      * Retrieves a document from the Index.
260      *
261      * @param documentId the id of the document to retrieve
262      * @return the Document
263      * @throws IOException thrown if there is an IOException
264      */
getDocument(int documentId)265     public synchronized Document getDocument(int documentId) throws IOException {
266         return indexSearcher.doc(documentId);
267     }
268 
269     /**
270      * Returns the number of CPE entries stored in the index.
271      *
272      * @return the number of CPE entries stored in the index
273      */
numDocs()274     public synchronized int numDocs() {
275         if (indexReader == null) {
276             return -1;
277         }
278         return indexReader.numDocs();
279     }
280 }
281