1 /*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "config.h"
30
31 #include "ColliderCorporaParser.h"
32 #include "FileCorpora.h"
33 #include "common.h"
34 #include "util/expression_path.h"
35
36 #include <iostream>
37 #include <fstream>
38
39 #include <boost/algorithm/string/trim.hpp>
40
41 using namespace std;
42
43 // Returns true if this line is empty or a comment and should be skipped
44 static
emptyLine(const string & line)45 bool emptyLine(const string& line) {
46 return line.empty() || line[0] == '#';
47 }
48
clone() const49 FileCorpora *FileCorpora::clone() const {
50 FileCorpora *copy = new FileCorpora();
51 copy->corpora_by_pat = corpora_by_pat;
52 return copy;
53 }
54
readLine(const string & line)55 bool FileCorpora::readLine(const string &line) {
56 unsigned id = 0;
57 Corpus c;
58 bool rv = parseCorpus(line, c, id);
59 if (rv) {
60 corpora_by_pat[id].push_back(c);
61 return true;
62 } else {
63 return false;
64 }
65 }
66
readFile(const string & filename)67 bool FileCorpora::readFile(const string &filename) {
68 ifstream f(filename.c_str());
69 if (!f.good()) {
70 return false;
71 }
72
73 unsigned lineNum = 0;
74 string line;
75 while (getline(f, line)) {
76 lineNum++;
77
78 boost::trim(line);
79
80 if (emptyLine(line)) {
81 continue;
82 }
83 if (!readLine(line)) {
84 cerr << "Error in corpora file parsing line " << lineNum << endl;
85 return false;
86 }
87 }
88 return !corpora_by_pat.empty();
89 }
90
generate(unsigned id,vector<Corpus> & data)91 void FileCorpora::generate(unsigned id,
92 vector<Corpus> &data) {
93 auto i = corpora_by_pat.find(id);
94 if (i == corpora_by_pat.end() || i->second.empty()) {
95 throw CorpusFailure("no corpora found for pattern.");
96 }
97
98 data.insert(data.end(), i->second.begin(), i->second.end());
99 }
100