1 /*
2 # Copyright (C) 1999-2020 The ViewCVS Group. All Rights Reserved.
3 #
4 # By using this file, you agree to the terms and conditions set forth in
5 # the LICENSE.html file which can be found at the top level of the ViewVC
6 # distribution or at http://viewvc.org/license-1.html.
7 #
8 # For more information, visit http://viewvc.org/
9 #
10 # -----------------------------------------------------------------------
11 #
12 # This file has been rewritten in C++ from the rcsparse.py file by
13 # Lucas Bruand <lucas.bruand@ecl2002.ec-lyon.fr>
14 #
15 # This file was originally based on portions of the blame.py script by
16 # Curt Hagenlocher.
17 #
18 # -----------------------------------------------------------------------
19 */
20 
21 /*
22    This C++ library offers an API to a performance-oriented RCSFILE parser.
23    It does little syntax checking.
24 
25    Version: $Id$
26  */
27 
28 #ifndef __PARSE_H
29 #define __PARSE_H
30 #include <memory>     /* for auto_ptr */
31 #include <algorithm>  /* for iterator */
32 #include <exception>  /* for exception */
33 #include <istream>    /* for istream */
34 #include <list>       /* for list<> */
35 #include <string>     /* for string */
36 
37 
38 #define CHUNK_SIZE 30000
39 #define DEFAULT_TOKEN_SIZE 512
40 #define DEFAULT_TOKEN_DELTA 10240
41 
42 #ifndef FALSE
43 #define FALSE (0 != 0)
44 #endif
45 
46 #ifndef TRUE
47 #define TRUE (0 == 0)
48 #endif
49 
50 using namespace std;
51 
52 /* This class represents a exception that occured during the parsing
53    of a file */
54 
55 class RCSParseError : public exception
56 {
57   public:
58     string value;
RCSParseError()59     RCSParseError() {};
RCSParseError(const char * myvalue)60     RCSParseError(const char *myvalue)
61     {
62       value = myvalue;
63     };
~RCSParseError()64     virtual ~RCSParseError() throw() {};
65 };
66 
67 class RCSIllegalCharacter : public RCSParseError
68 {
69   public:
RCSIllegalCharacter(const char * myvalue)70     RCSIllegalCharacter(const char *myvalue)
71     {
72       value = myvalue;
73     };
~RCSIllegalCharacter()74     virtual ~RCSIllegalCharacter() throw() {};
75 };
76 
77 class RCSExpected : public RCSParseError
78 {
79   public:
80     string got;
81     string wanted;
RCSExpected(const char * mygot,const char * mywanted)82     RCSExpected(const char *mygot, const char *mywanted)
83     {
84       got = mygot;
85       wanted = mywanted;
86     };
RCSExpected(const char * mygot,const char c)87     RCSExpected(const char *mygot, const char c)
88     {
89       got = mygot;
90       wanted = c;
91     };
~RCSExpected()92     virtual ~RCSExpected() throw() {};
93 };
94 
95 class rcstoken
96 {
97  public:
98   size_t length, size, delta;
99   char *data;
100 
101  public:
rcstoken(const char * mydata,size_t mylen)102   rcstoken(const char *mydata, size_t mylen)
103   {
104     init(mydata, mylen);
105   };
rcstoken(const char * mydata)106   rcstoken(const char *mydata)
107     {
108       init(mydata, strlen(mydata));
109     };
110   rcstoken(size_t mysize = DEFAULT_TOKEN_SIZE,
111            size_t mydelta = DEFAULT_TOKEN_DELTA)
112     {
113       data = NULL;
114       size = mysize;
115       length = 0;
116       delta = mydelta;
117     };
~rcstoken()118   ~rcstoken()
119     {
120       if (data)
121         free(data);
122       data = NULL;
123     };
124   void init(const char *mydata, size_t mylen);
null_token()125   int null_token()
126     {
127       return data == NULL;
128     };
129   rcstoken& operator=(const char b)
130     {
131       grow(2);
132       length = 1;
133       data[0] = b;
134       data[1] = 0;
135 
136       return *this;
137     };
138   rcstoken& operator+=(const char b)
139     {
140       append(b);
141 
142       return *this;
143     };
144   rcstoken& operator+=(rcstoken& token)
145     {
146       append(token);
147 
148       return *this;
149     };
150   int operator==(const char *b)
151     {
152       size_t b_len;
153       return data && b && length == (b_len = strlen(b)) &&
154         memcmp(data, b, (b_len<length) ? b_len : length) == 0;
155     };
156   int operator!=(const char *b)
157     {
158       return (! (*this == b));
159     };
160   int operator==(const char b)
161     {
162       return (length == 1) && data && (*data == b);
163     };
164   int operator!=(const char b)
165     {
166       return (! (*this==b));
167     };
168   char operator[](size_t i)
169     {
170       return data[i];
171     };
172   void append(const char *b, size_t b_len);
append(const char b)173   void append(const char b)
174     {
175       grow(length+2);
176       data[length] = b;
177       data[length++] = 0;
178     };
append(rcstoken & token)179   void append(rcstoken& token)
180     {
181       append(token.data, token.length);
182     };
183   void grow(size_t new_size);
184   rcstoken *copy_begin_end(size_t begin, size_t end);
185   rcstoken *copy_begin_len(size_t begin, size_t len);
186 };
187 
188 typedef list<rcstoken> tokenlist;
189 typedef tokenlist::iterator tokenlist_iter;
190 
191 
192 
193 /* This class is a handler that receive the event generated by the parser
194    i.e.: When we reach the head revision tag, etc... */
195 class Sink
196 {
197   public:
Sink()198     Sink() {};
~Sink()199     virtual ~Sink() throw () {};
200     virtual void set_head_revision(rcstoken &revision) = 0;
201     virtual void set_principal_branch(rcstoken &branch_name) = 0;
202     virtual void define_tag(rcstoken &name, rcstoken &revision) = 0;
203     virtual void set_comment(rcstoken &comment) = 0;
204     virtual void set_description(rcstoken &description) = 0;
205     virtual void define_revision(rcstoken &revision, long timestamp,
206                                  rcstoken &author, rcstoken &state,
207                                  tokenlist &branches, rcstoken &next) = 0;
208     virtual void set_revision_info(rcstoken &revision,
209                                    rcstoken &log, rcstoken &text) = 0;
210     virtual void tree_completed() = 0;
211     virtual void parse_completed() = 0;
212 };
213 
214 /* The class is used to get one by one every token in the file. */
215 class TokenParser
216 {
217   private:
218     istream *input;
219     char buf[CHUNK_SIZE];
220     int buflength;
221     int idx;
222     rcstoken *backget;
223   public:
224     rcstoken *get(int allow_eof);
225     void unget(rcstoken *token);
eof()226     int eof()
227     {
228       return (input->gcount() == 0);
229     };
match(const char * token)230     void match(const char *token)
231     {
232       auto_ptr<rcstoken> ptr(get(FALSE));
233       if (*ptr != token)
234         throw RCSExpected(ptr->data, token);
235     }
match(const char c)236     void match(const char c)
237     {
238       auto_ptr<rcstoken> token(get(FALSE));
239 
240       if ((*token) != c)
241         throw RCSExpected(token->data, c);
242     };
TokenParser(istream * myinput)243     TokenParser(istream *myinput)
244     {
245       input = myinput;
246       backget = NULL;
247       idx = 0;
248       input->read(buf, CHUNK_SIZE);
249       if ( (buflength = input->gcount()) == 0 )
250         throw RCSParseError("Non-existing file or empty file");
251     };
~TokenParser()252     ~TokenParser()
253     {
254       if (input != NULL)
255       {
256         delete input;
257         input = NULL;
258       };
259       if (backget != NULL)
260       {
261         delete backget;
262         backget = NULL;
263       };
264     };
265 };
266 
267 /* this is the class that does the actual job: by reading each part of
268    the file and thus generate events to a sink event-handler*/
269 class tparseParser
270 {
271   private:
272     TokenParser *tokenstream;
273     Sink *sink;
274     void parse_rcs_admin();
275     void parse_rcs_tree();
276     void parse_rcs_description();
277     void parse_rcs_deltatext();
278   public:
tparseParser(istream * myinput,Sink * mysink)279     tparseParser(istream *myinput, Sink* mysink)
280     {
281       sink = mysink;
282       tokenstream = new TokenParser(myinput);
283     }
parse()284     void parse()
285     {
286       parse_rcs_admin();
287       parse_rcs_tree();
288 
289       // many sinks want to know when the tree has been completed so they can
290       // do some work to prepare for the arrival of the deltatext
291       sink->tree_completed();
292 
293       parse_rcs_description();
294       parse_rcs_deltatext();
295       // easiest for us to tell the sink it is done, rather than worry about
296       // higher level software doing it.
297       sink->parse_completed();
298     }
~tparseParser()299     ~tparseParser()
300     {
301       delete tokenstream;
302       delete sink;
303     }
304 };
305 
306 #endif /* __PARSE_H */
307