1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "config.h"
30 
31 #include "ColliderCorporaParser.h"
32 #include "FileCorpora.h"
33 #include "common.h"
34 #include "util/expression_path.h"
35 
36 #include <iostream>
37 #include <fstream>
38 
39 #include <boost/algorithm/string/trim.hpp>
40 
41 using namespace std;
42 
43 // Returns true if this line is empty or a comment and should be skipped
44 static
emptyLine(const string & line)45 bool emptyLine(const string& line) {
46     return line.empty() || line[0] == '#';
47 }
48 
clone() const49 FileCorpora *FileCorpora::clone() const {
50     FileCorpora *copy = new FileCorpora();
51     copy->corpora_by_pat = corpora_by_pat;
52     return copy;
53 }
54 
readLine(const string & line)55 bool FileCorpora::readLine(const string &line) {
56     unsigned id = 0;
57     Corpus c;
58     bool rv = parseCorpus(line, c, id);
59     if (rv) {
60         corpora_by_pat[id].push_back(c);
61         return true;
62     } else {
63         return false;
64     }
65 }
66 
readFile(const string & filename)67 bool FileCorpora::readFile(const string &filename) {
68     ifstream f(filename.c_str());
69     if (!f.good()) {
70         return false;
71     }
72 
73     unsigned lineNum = 0;
74     string line;
75     while (getline(f, line)) {
76         lineNum++;
77 
78         boost::trim(line);
79 
80         if (emptyLine(line)) {
81             continue;
82         }
83         if (!readLine(line)) {
84             cerr << "Error in corpora file parsing line " << lineNum << endl;
85             return false;
86         }
87     }
88     return !corpora_by_pat.empty();
89 }
90 
generate(unsigned id,vector<Corpus> & data)91 void FileCorpora::generate(unsigned id,
92                            vector<Corpus> &data) {
93     auto i = corpora_by_pat.find(id);
94     if (i == corpora_by_pat.end() || i->second.empty()) {
95         throw CorpusFailure("no corpora found for pattern.");
96     }
97 
98     data.insert(data.end(), i->second.begin(), i->second.end());
99 }
100