1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2  * Use of this file is governed by the BSD 3-clause license that
3  * can be found in the LICENSE.txt file in the project root.
4  */
5 
6 #include "atn/ATN.h"
7 #include "atn/ATNDeserializer.h"
8 #include "Vocabulary.h"
9 
10 #include "misc/InterpreterDataReader.h"
11 
12 using namespace antlr4::dfa;
13 using namespace antlr4::atn;
14 using namespace antlr4::misc;
15 
InterpreterData(std::vector<std::string> const & literalNames,std::vector<std::string> const & symbolicNames)16 InterpreterData::InterpreterData(std::vector<std::string> const& literalNames, std::vector<std::string> const& symbolicNames)
17 : vocabulary(literalNames, symbolicNames) {
18 }
19 
parseFile(std::string const & fileName)20 InterpreterData InterpreterDataReader::parseFile(std::string const& fileName) {
21   // The structure of the data file is very simple. Everything is line based with empty lines
22   // separating the different parts. For lexers the layout is:
23   // token literal names:
24   // ...
25   //
26   // token symbolic names:
27   // ...
28   //
29   // rule names:
30   // ...
31   //
32   // channel names:
33   // ...
34   //
35   // mode names:
36   // ...
37   //
38   // atn:
39   // <a single line with comma separated int values> enclosed in a pair of squared brackets.
40   //
41   // Data for a parser does not contain channel and mode names.
42 
43   std::ifstream input(fileName);
44   if (!input.good())
45     return {};
46 
47   std::vector<std::string> literalNames;
48   std::vector<std::string> symbolicNames;
49 
50   std::string line;
51 
52   std::getline(input, line, '\n');
53   assert(line == "token literal names:");
54   while (true) {
55     std::getline(input, line, '\n');
56     if (line.empty())
57       break;
58 
59     literalNames.push_back(line == "null" ? "" : line);
60   };
61 
62   std::getline(input, line, '\n');
63   assert(line == "token symbolic names:");
64   while (true) {
65     std::getline(input, line, '\n');
66     if (line.empty())
67       break;
68 
69     symbolicNames.push_back(line == "null" ? "" : line);
70   };
71   InterpreterData result(literalNames, symbolicNames);
72 
73   std::getline(input, line, '\n');
74   assert(line == "rule names:");
75   while (true) {
76     std::getline(input, line, '\n');
77     if (line.empty())
78       break;
79 
80     result.ruleNames.push_back(line);
81   };
82 
83   std::getline(input, line, '\n');
84   if (line == "channel names:") {
85     while (true) {
86       std::getline(input, line, '\n');
87       if (line.empty())
88         break;
89 
90       result.channels.push_back(line);
91     };
92 
93     std::getline(input, line, '\n');
94     assert(line == "mode names:");
95     while (true) {
96       std::getline(input, line, '\n');
97       if (line.empty())
98         break;
99 
100       result.modes.push_back(line);
101     };
102   }
103 
104   std::vector<uint16_t> serializedATN;
105 
106   std::getline(input, line, '\n');
107   assert(line == "atn:");
108   std::getline(input, line, '\n');
109   std::stringstream tokenizer(line);
110   std::string value;
111   while (tokenizer.good()) {
112     std::getline(tokenizer, value, ',');
113     unsigned long number;
114     if (value[0] == '[')
115       number = std::strtoul(&value[1], nullptr, 10);
116     else
117       number = std::strtoul(value.c_str(), nullptr, 10);
118     serializedATN.push_back(static_cast<uint16_t>(number));
119   }
120 
121   ATNDeserializer deserializer;
122   result.atn = deserializer.deserialize(serializedATN);
123   return result;
124 }
125