1 #include "cpp_compress.hh"
2
3 #include <vector>
4 #include <map>
5 #include <set>
6 #include <iostream>
7
8 #include <stdio.h>
9
10 namespace
11 {
12 std::string macro_prefix_chars = "qghdowam";
13 bool verbose = false;
14
15 /* ^List of characters that are _least_ likely to form
16 * a legitimate identifier name in the input code when
17 * a [0-9A-Z] (case sensitive) is appended to it.
18 */
19
20 std::set<std::string> parametric_macro_list;
21
reset_parametric_macro_list()22 void reset_parametric_macro_list()
23 {
24 static const char* const list[] =
25 {
26 /* IMPORTANT NOTE: HARDCODED LIST OF ALL PREPROCESSOR
27 * MACROS THAT TAKE PARAMETERS. ANY EXTERNAL MACROS
28 * NOT LISTED HERE WILL BE BROKEN AFTER COMPRESSION.
29 * MACROS DEFINED WITHIN THE PROCESSED INPUT ARE FINE.
30 */
31 "assert", "getc", "putc",
32 "FP_TRACE_OPCODENAME",
33 "FP_TRACE_BYTECODE_OPTIMIZATION",
34 "FP_TRACE_BYTECODE_ADD",
35 "N","P", "DBL_ONLY", "LNG_ONLY",
36 "incStackPtr", "TryCompilePowi","findName"
37 };
38 parametric_macro_list.clear();
39 for(unsigned p=0; p<sizeof(list)/sizeof(*list); ++p)
40 parametric_macro_list.insert(list[p]);
41 }
42
43 struct length_rec
44 {
45 unsigned begin_index;
46 unsigned num_tokens;
47 unsigned num_occurrences;
48 };
49
isnamechar(char c)50 inline bool isnamechar(char c)
51 {
52 switch(c)
53 {
54 case '0':case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':
55 case '1':case 'g':case 'h':case 'i':case 'j':case 'k':case 'l':
56 case '2':case 'm':case 'n':case 'o':case 'p':case 'q':case 'r':
57 case '3':case 's':case 't':case 'u':case 'v':case 'w':case 'x':
58 case '4':case 'y':case 'z':case '_':
59 case '5':case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':
60 case '6':case 'G':case 'H':case 'I':case 'J':case 'K':case 'L':
61 case '7':case 'M':case 'N':case 'O':case 'P':case 'Q':case 'R':
62 case '8':case 'S':case 'T':case 'U':case 'V':case 'W':case 'X':
63 case '9':case 'Y':case 'Z': return true;
64 }
65 return false;
66 }
67
68 class StringSeq: private std::string
69 {
70 private:
71 std::vector<bool> sticky;
72 public:
StringSeq()73 StringSeq()
74 : std::string(), sticky() { }
75
StringSeq(const std::string & s)76 StringSeq(const std::string& s)
77 : std::string(s), sticky() { }
78
GetString() const79 const std::string& GetString() const { return *this; }
80
81 template<typename T>
operator +=(const T & k)82 void operator += (const T& k)
83 {
84 std::string::operator+=(k);
85 }
operator [](size_t pos) const86 char operator[] (size_t pos) const
87 {
88 return std::string::operator[] (pos);
89 }
90
SetSticky(size_t n_last_chars)91 void SetSticky(size_t n_last_chars)
92 {
93 if(sticky.size() < size()) sticky.resize(size());
94 for(size_t p = size() - n_last_chars; p < size(); ++p)
95 sticky[p] = true;
96 }
IsSticky(size_t index) const97 bool IsSticky(size_t index) const
98 {
99 return index < sticky.size() && sticky[index];
100 }
HasSticky() const101 bool HasSticky() const { return !sticky.empty(); }
102
substr(size_t begin,size_t count=std::string::npos) const103 StringSeq substr(size_t begin, size_t count = std::string::npos) const
104 {
105 StringSeq result;
106 result.std::string::assign(*this, begin,count);
107 result.sticky.assign(
108 begin < sticky.size()
109 ? sticky.begin() + begin
110 : sticky.end(),
111 (count != std::string::npos && begin+count < sticky.size())
112 ? sticky.begin() + begin + count
113 : sticky.end());
114 while(!result.sticky.empty() && !result.sticky.back())
115 result.sticky.pop_back();
116 return result;
117 }
118
119 using std::string::empty;
120 using std::string::size;
121 using std::string::append;
122 };
123
124 struct token
125 {
126 std::string value;
127 struct token_meta
128 {
129 unsigned hash;
130 int balance;
131 unsigned comma;
132 bool preproc;
133 } meta;
134
token__anond5476afc0111::token135 token(const std::string& v) : value(v)
136 {
137 Rehash();
138 }
139
token__anond5476afc0111::token140 token(const StringSeq& v) : value(v.GetString())
141 {
142 Rehash();
143 if(v.HasSticky())
144 meta.preproc = true;
145 }
146
operator =__anond5476afc0111::token147 void operator=(const std::string& v) { value=v; Rehash(); }
148
operator ==__anond5476afc0111::token149 bool operator==(const token& b) const
150 {
151 return meta.hash == b.meta.hash
152 && value == b.value
153 && meta.preproc == b.meta.preproc;
154 }
155
operator !=__anond5476afc0111::token156 bool operator!=(const token& b) const
157 { return !operator==(b); }
158
Rehash__anond5476afc0111::token159 void Rehash()
160 {
161 const char* v = value.data();
162 meta.preproc = (v[0] == '#' || v[0] == '\n');
163 meta.balance = 0;
164 meta.comma = 0;
165 meta.hash = 0;
166 for(size_t a=0; a<value.size(); ++a)
167 {
168 //meta.hash = meta.hash*0x8088405u + v[a];
169 meta.hash = ((meta.hash << (1)) | (meta.hash >> (32-1))) - v[a];
170 switch(v[a])
171 {
172 case '(': ++meta.balance; break;
173 case ')': --meta.balance; break;
174 case ',': ++meta.comma; break;
175 }
176 }
177 }
swap__anond5476afc0111::token178 void swap(token& b)
179 {
180 value.swap(b.value);
181 std::swap(meta, b.meta);
182 }
183 };
184 bool Debug = false;
GetSeq(std::vector<token>::const_iterator begin,size_t n,bool NewLines)185 StringSeq GetSeq(std::vector<token>::const_iterator begin,
186 size_t n, bool NewLines)
187 {
188 bool InDefineMode = false;
189
190 /* Resequence the input */
191 StringSeq result;
192 int quotemode = 0;
193 while(n-- > 0)
194 {
195 const token& tok = *begin;
196 const std::string& value = tok.value; ++begin;
197
198 if(Debug)
199 result += tok.meta.preproc
200 ? (InDefineMode ? "\xBF" : "\xA1") // upside-down ? and !
201 : (InDefineMode ? "\x3F" : "\x21"); // ?, !
202 else if (!result.empty() && result[result.size()-1] != '\n')
203 {
204 if (!InDefineMode && NewLines && value[0] == '#') result += '\n';
205 else if (isnamechar(value[0])
206 && (isnamechar(result[result.size()-1])
207 || result[result.size()-1] == '"'
208 || result[result.size()-1] == '\''
209 // || result[result.size()-1]==')'
210 /* An identifier preceded by an identifier character
211 * requires a separator. Also, an identifier
212 * preceded by a macro that expands to something
213 * that ends in an identifier character requires a
214 * separator when using Microsoft C++, thus you may
215 * need to include the ")" check above sometimes.
216 * Also, in C++11 you can't tack an identifier right
217 * after a string or character constant or there will
218 * be an error.
219 */
220 ))
221 {
222 if (!NewLines || InDefineMode)
223 result += ' ';
224 else
225 result += '\n';
226 }
227 }
228 if (value[0] == '#')
229 {
230 if (value.substr(0,7) == "#define") InDefineMode = true;
231 if (value.substr(0,8) == "#include") InDefineMode = true;
232 }
233 if (value == "\n")
234 {
235 if (InDefineMode) { result += "\n"; InDefineMode = false; }
236 continue;
237 }
238
239 switch(quotemode)
240 {
241 case 0: // prev wasn't a quote, or this is not a quote
242 if (value[0] == '"'
243 && (n>0 && begin->value[0] == '"')) // this and next are quotes
244 { quotemode = 1;
245 result.append(value, 0, value.size()-1);
246 //result += value.substr(0, value.size()-1);
247 continue;
248 }
249 else
250 {
251 result += value;
252 if(tok.meta.preproc || (value[0] == '(' && value.size() > 1))
253 {
254 /* A macro parameter list is an undivisible entity */
255 result.SetSticky(value.size());
256 }
257 }
258 break;
259 case 1: // prev was a quote, skip this quote
260 if (n>0 && begin->value[0] == '"')
261 { //result += value.substr(1, value.size()-2);
262 result.append(value, 1, value.size()-2);
263 continue;
264 }
265 else
266 { quotemode = 0;
267 //result += value.substr(1);
268 result.append(value, 1, value.size()-1);
269 }
270 break;
271 }
272 if (!Debug)
273 {
274 if (NewLines && !InDefineMode)
275 {
276 if (value[0] == '#'
277 || value[0] == '}'
278 || value[0] == '"'
279 )
280 {
281 result += '\n';
282 }
283 }
284 if (n > 0 && (value.size() == 1 &&
285 (value[0] == '<' // < < is not <<
286 || value[0] == '>' // > > is not >>
287 || value[0] == '+' // + + is not ++
288 || value[0] == '-' // - - is not --
289 || value[0] == '&') // & & is not &&
290 ) && value == begin->value)
291 {
292 result += ' ';
293 }
294 }
295 }
296 return result;
297 }
298
299 struct DefineParsingMode
300 {
301 bool Active;
302 std::set<std::string> ParamList;
303
DefineParsingMode__anond5476afc0111::DefineParsingMode304 DefineParsingMode() : Active(false), ParamList() { }
305
Activate__anond5476afc0111::DefineParsingMode306 void Activate() { Active=true; }
Deactivate__anond5476afc0111::DefineParsingMode307 void Deactivate() { Active=false; ParamList.clear(); }
308 };
309
310 std::vector<token>
Tokenize(const StringSeq & input,DefineParsingMode & defmode,bool SplitMacros,bool SplitStrings)311 Tokenize(const StringSeq& input,
312 DefineParsingMode& defmode,
313 bool SplitMacros,
314 bool SplitStrings)
315 {
316 std::vector<token> result;
317 size_t a=0, b=input.size();
318 while(a < b)
319 {
320 /*if(input.IsSticky(a))
321 {
322 size_t eat = 1;
323 while(input.IsSticky(a+eat)) ++eat;
324 result.push_back( input.substr(a, eat) );
325 a += eat;
326 continue;
327 }*/
328 if (defmode.Active && input[a]=='\\' && input[a+1] == '\n')
329 {
330 a += 2;
331 continue;
332 }
333 if (defmode.Active && input[a]=='\n')
334 {
335 defmode.Deactivate();
336 result.push_back( token("\n") );
337 ++a;
338 continue;
339 }
340 if (input[a]==' ' || input[a]=='\t'
341 || input[a]=='\n' || input[a]=='\r') { ++a; continue; }
342
343 if (input[a]=='/' && input[a+1]=='*')
344 {
345 a += 2;
346 while(a < b && (input[a-2]!='*' || input[a-1]!='/')) ++a;
347 continue;
348 }
349 if (input[a]=='/' && input[a+1]=='/')
350 {
351 while(a < b && input[a]!='\n') ++a;
352 continue;
353 }
354 if (input[a]=='_' || (input[a]>='a' && input[a]<='z')
355 || (input[a]>='A' && input[a]<='Z'))
356 {
357 size_t name_begin = a;
358 while(++a < b)
359 {
360 if (isnamechar(input[a])) continue;
361 break;
362 }
363
364 std::string name(input.GetString(), name_begin, a-name_begin);
365 result.push_back(name);
366
367 if (defmode.Active && defmode.ParamList.find(name)
368 != defmode.ParamList.end())
369 {
370 // Define params are immutable.
371 result.back().meta.preproc = true;
372 }
373
374 if (input[a] == '('
375 && parametric_macro_list.find(name)
376 != parametric_macro_list.end())
377 {
378 std::vector<token> remains = Tokenize(input.substr(a), defmode, SplitMacros, SplitStrings);
379 int balance = 1;
380 size_t eat = 1;
381 for(; eat < remains.size() && balance != 0; ++eat)
382 balance += remains[eat].meta.balance;
383 if(SplitMacros)
384 {
385 for(size_t c=0; c<eat; ++c)
386 if(remains[c].meta.balance != 0
387 || remains[c].meta.comma != 0)
388 remains[c].meta.preproc = true;
389 result.insert(result.end(), remains.begin(), remains.end());
390 }
391 else
392 {
393 //result.push_back( GetSeq(remains.begin(), eat, false) );
394 StringSeq tmp = GetSeq(remains.begin(), eat, false);
395 result.back() = result.back().value + tmp.GetString();
396 result.insert(result.end(), remains.begin()+eat, remains.end());
397 }
398 a = b; // done
399 }
400 continue;
401 }
402 if (std::isdigit(input[a]) ||
403 (input[a] == '.' && std::isdigit(input[a+1])))
404 {
405 size_t value_begin = a;
406 while(++a < b)
407 {
408 if ((input[a]>='0' && input[a]<='9')
409 || input[a]=='.' || input[a]=='+' || input[a]=='-'
410 || input[a]=='x' || (input[a]>='a' && input[a]<='f')
411 || input[a]=='p' || (input[a]>='A' && input[a]<='F')
412 || input[a]=='u' || input[a]=='U'
413 || input[a]=='l' || input[a]=='f'
414 || input[a]=='L' || input[a]=='F') continue;
415 break;
416 }
417 StringSeq s = input.substr(value_begin, a-value_begin );
418 /* TODO: Convert hex to decimal */
419 result.push_back( s );
420 continue;
421 }
422 if (a+1 < b && input[a] == '>' && input[a+1] == '>')
423 { int n = (a+2 < b && input[a+2] == '=') ? 3 : 2;
424 result.push_back(input.substr(a, n)); a += n; continue; }
425 if (a+1 < b && input[a] == '<' && input[a+1] == '<')
426 { int n = (a+2 < b && input[a+2] == '=') ? 3 : 2;
427 result.push_back(input.substr(a, n)); a += n; continue; }
428 if (a+1 < b && input[a] == '+' && input[a+1] == '+')
429 { result.push_back(input.substr(a, 2)); a += 2; continue; }
430 if (a+1 < b && input[a] == '-' && input[a+1] == '-')
431 { result.push_back(input.substr(a, 2)); a += 2; continue; }
432 if (a+1 < b && input[a] == '-' && input[a+1] == '>')
433 { result.push_back(input.substr(a, 2)); a += 2; continue; }
434 if (a+1 < b && input[a] == '#' && input[a+1] == '#')
435 { result.push_back(input.substr(a, 2)); a += 2; continue; }
436 if (a+1 < b && input[a] == '&' && input[a+1] == '&')
437 { result.push_back(input.substr(a, 2)); a += 2; continue; }
438 if (a+1 < b && input[a] == '|' && input[a+1] == '|')
439 { result.push_back(input.substr(a, 2)); a += 2; continue; }
440 if (a+1 < b && (input[a] == '>' || input[a] == '<'
441 || input[a] == '!' || input[a] == '='
442 || input[a] == '+' || input[a] == '-'
443 || input[a] == '*' || input[a] == '/'
444 || input[a] == '&' || input[a] == '|'))
445 if (input[a+1] == '=')
446 { result.push_back(input.substr(a, 2)); a += 2; continue; }
447 if (a+1 < b && (input[a] == ':' && input[a+1] == ':'))
448 { result.push_back(input.substr(a, 2)); a += 2; continue; }
449 if (!defmode.Active && input[a] == '#')
450 {
451 if (input.substr(a,8).GetString() == "#include")
452 {
453 size_t p = a;
454 while(p < b && input[p] != '\n') ++p;
455 result.push_back( input.substr(a, p-a) );
456 result.back().meta.preproc = true;
457 result.push_back( token("\n") );
458 a = p;
459 continue;
460 }
461 if (input.substr(a,7).GetString() == "#define")
462 {
463 size_t p = a+7;
464 while(p < b && std::isspace(input[p])) ++p; // skip blank
465 size_t term_begin = p;
466 while(p < b && isnamechar(input[p])) ++p; // skip term
467 if (input[p] != '(')
468 {
469 defmode.Activate();
470 std::string def = input.substr(a, p-a).GetString();
471 if(input[p] != '\n') def += ' ';
472 result.push_back(def); /* #define, term name and a space */
473 a = p;
474 continue;
475 }
476 else
477 {
478 std::string macro(input.GetString(), term_begin, p-term_begin);
479 if(parametric_macro_list.find(macro)
480 == parametric_macro_list.end())
481 {
482 if(verbose) std::cerr << "Detected parametric macro: " << macro << " (ok)\n";
483 parametric_macro_list.insert(macro);
484 }
485
486 size_t /*param_list_begin = p,*/ param_begin = p+1;
487 int balance = 1;
488 for (++p; true; ++p)
489 {
490 if(input[p]=='(') ++balance;
491 if(input[p]==',' || input[p]==')')
492 {
493 std::string paramname = input.substr(param_begin,p-param_begin).GetString();
494 //std::cerr << "Param name<" << paramname << ">\n";
495 defmode.ParamList.insert(paramname);
496 param_begin = p+1;
497 }
498 if(input[p]==')') { if(--balance == 0) { ++p; break; } }
499 }
500 size_t param_list_end = p;
501 while(input[p] != '\n' && std::isspace(input[p])) ++p;
502
503 defmode.Activate();
504 std::string def = input.substr(a, param_list_end-a).GetString();
505 if(input[p] != '\n') def += ' ';
506 result.push_back(def); /* #define, term name, params and a space */
507 a = p;
508 continue;
509 }
510 }
511 size_t preproc_begin = a;
512 bool in_quotes = false;
513 while(++a < b)
514 {
515 if (!in_quotes && input[a]=='"')
516 { in_quotes=true; continue; }
517 if (in_quotes && input[a]=='"' && input[a-1]!='\\')
518 { in_quotes=false; continue; }
519 if (input[a]=='\\' && input[a+1]=='\n') { ++a; continue; }
520 if (input[a]=='\n') break;
521 }
522 std::string stmt(input.GetString(), preproc_begin, a-preproc_begin);
523 if (stmt.substr(0,5) != "#line")
524 result.push_back(stmt);
525 result.push_back( token("\n") );
526 continue;
527 }
528 if (input[a] == '"')
529 {
530 size_t string_begin = a;
531 while(++a < b)
532 if (input[a]=='"' &&
533 (input[a-1] != '\\'
534 || input[a-2]=='\\')) { ++a; break; }
535 if(input.GetString() == "\"\"") continue; // Don't add an empty string token
536
537 std::string stringconst(input.GetString(), string_begin+1, a-string_begin-2);
538 if (SplitMacros)
539 while (true)
540 {
541 size_t p = stringconst.find_first_of(" ,+-", 0, 1);
542 if(p == stringconst.npos) break;
543 if(p > 0)
544 result.push_back( "\""+std::string(stringconst,0,p)+"\"" );
545 result.push_back( "\""+std::string(stringconst,p,1)+"\"" );
546 stringconst.erase(0, p+1);
547 }
548 if(!stringconst.empty()) result.push_back("\""+stringconst+"\"");
549 continue;
550 }
551 if (input[a] == '\'')
552 {
553 size_t char_begin = a; a += 3;
554 if (input[a-2] == '\\') ++a;
555 result.push_back( std::string(input.GetString(), char_begin, a-char_begin) );
556 continue;
557 }
558
559 result.push_back( input.substr(a++, 1) );
560 }
561 return result;
562 }
563
564 static const char cbuf[] =
565 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
566
567 unsigned macro_counter = 0;
568
GenerateMacroName()569 std::string GenerateMacroName()
570 {
571 std::string result;
572 unsigned p = macro_counter++;
573 result += macro_prefix_chars[(p/36)%macro_prefix_chars.size()];
574 result += cbuf[p%36];
575 p /= unsigned(macro_prefix_chars.size()*36); // 0-9A-Z
576 for(; p != 0; p /= 63)
577 result += cbuf[p%63];
578 return result;
579 }
580
CompressWithNonparametricMacros(std::vector<token> & tokens,const std::string & seq_name_buf)581 bool CompressWithNonparametricMacros
582 (std::vector<token>& tokens,
583 const std::string& seq_name_buf)
584 {
585 size_t seq_name_length = seq_name_buf.size();
586
587 /* Find a sub-sequence of tokens for which
588 * the occurrence-count times total length is
589 * largest and the balance of parentheses is even.
590 */
591 std::map<unsigned, length_rec> hash_results;
592 long best_score=0;
593 //size_t best_score_length=0;
594 unsigned best_hash=0;
595
596 if(verbose) std::cerr << tokens.size() << " tokens\n";
597
598 std::vector<bool> donttest(tokens.size(), false);
599
600 const size_t lookahead_depth = (140*100000) / tokens.size();
601 // ^Lookahead limit. The value chosen is an arbitrary value
602 // to shield against exponential runtime. A larger value
603 // yields better compression, but is slower.
604
605 for(size_t a=0; a<tokens.size(); ++a)
606 {
607 if (donttest[a]) continue;
608
609 //std::cerr << a << '\t' << best_score << '\t' << best_score_length << '\r' << std::flush;
610 size_t cap = a+lookahead_depth;
611 for(size_t b=a+1; b<tokens.size() && b<cap; ++b)
612 {
613 size_t max_match_len = std::min(tokens.size()-b, b-a);
614 size_t match_len = 0;
615 unsigned hash = 0;
616 //int balance = 0;
617
618 while(match_len < max_match_len
619 && tokens[a+match_len] == tokens[b+match_len]
620 && tokens[a+match_len].meta.preproc == false)
621 {
622 const token& word = tokens[a+match_len];
623
624 //balance += word.meta.balance;
625 //if (preserve_params
626 // && (balance < 0 || (word.meta.comma && balance==0))) break;
627
628 ++match_len;
629 hash = ((hash << (1+0*(match_len&31)))
630 ^ (hash >> (31-0*(match_len&31))))
631 ^ word.meta.hash;
632 //hash = ~hash*0x8088405u + word.meta.hash;
633
634 donttest[b] = true;
635
636 //if (!balance == 0)
637 {
638 std::map<unsigned, length_rec>::iterator i
639 = hash_results.lower_bound(hash);
640 if (i == hash_results.end() || i->first != hash)
641 {
642 length_rec rec;
643 rec.begin_index = (unsigned)a;
644 rec.num_tokens = (unsigned)match_len;
645 rec.num_occurrences = 1;
646 hash_results.insert(i, std::make_pair(hash,rec));
647 cap = std::max(cap, b+match_len+lookahead_depth);
648 }
649 else if (i->second.begin_index == (unsigned)a)
650 {
651 if (std::equal(
652 tokens.begin()+a, tokens.begin()+a+match_len,
653 tokens.begin() + i->second.begin_index))
654 {
655 long string_len = GetSeq(tokens.begin()+a, match_len, false).size();
656 long n = (i->second.num_occurrences += 1);
657 long define_length = seq_name_length + 9 - long(string_len);
658 long replace_length = long(string_len) - (long(seq_name_length)+1);
659 long score = replace_length * n - define_length;
660 if (score > best_score)
661 {
662 best_score = score;
663 //best_score_length = string_len;
664 best_hash = hash;
665 }
666 }
667 cap = std::max(cap, b+match_len+lookahead_depth);
668 }
669 }
670 }
671 }
672 }
673 if (best_score > 0)
674 {
675 const length_rec& rec = hash_results[best_hash];
676 if (rec.num_occurrences > 0)
677 {
678 /* Found a practical saving */
679 std::vector<token> sequence
680 (tokens.begin()+rec.begin_index,
681 tokens.begin()+rec.begin_index+rec.num_tokens);
682 if(verbose) std::cerr << "#define " << seq_name_buf << " " <<
683 GetSeq(sequence.begin(), sequence.size(), false).GetString()
684 //<< " /* " << rec.num_occurrences
685 //<< " occurrences */"
686 << std::endl;
687
688 /* Replace all occurrences of the sequence with the sequence name */
689 std::vector<bool> deletemap(tokens.size(), false);
690 for(size_t a=0;//rec.begin_index+rec.num_tokens;
691 a+rec.num_tokens<=tokens.size();
692 ++a)
693 {
694 if (std::equal(sequence.begin(),
695 sequence.end(),
696 tokens.begin()+a))
697 {
698 tokens[a] = seq_name_buf;
699 for(size_t b=1; b<rec.num_tokens; ++b)
700 deletemap[++a] = true;
701 }
702 }
703 size_t tgt=0, src=0;
704 for(; src < tokens.size(); ++src)
705 if (!deletemap[src])
706 tokens[tgt++].swap(tokens[src]);
707 tokens.erase(tokens.begin()+tgt, tokens.end());
708
709 sequence.insert(sequence.begin(),
710 token("#define " + seq_name_buf + " "));
711 sequence.push_back( token("\n") );
712
713 tokens.insert(tokens.begin(),
714 sequence.begin(), sequence.end());
715
716 /* Find more repetitions */
717 return true;
718 }
719 }
720 return false;
721 }
722
CompressWithParametricMacros(std::vector<token> &,const std::string &)723 bool CompressWithParametricMacros(
724 std::vector<token>& /*tokens*/,
725 const std::string& /*seq_name_buf*/)
726 {
727 /* TODO: Someone invent an algorithm for this?
728 *
729 * Find e.g.
730 *
731 * Cmpeq_i qI x == y ; } q1
732 * Cmpge_d qI x >= y ; } q1
733 * Cmpge_i qI x >= y ; } q1
734 * Cmpgt_d qI x > y ; } q1
735 * Cmpgt_i qI x > y ; } q1
736 * Cmple_d qI x <= y ; } q1
737 * Cmple_i qI x <= y ; } q1
738 * Cmplt_d qI x < y ; } q1
739 * Cmplt_i qI x < y ; } q1
740 * Cmpne_d qI x != y ; } q1
741 * Cmpne_i qI x != y ; } q1
742 *
743 * Substitute with:
744 *
745 * #define Zzz(A,B) A qI x B y ; } q1
746 * Zzz(Cmpeq_i,==)
747 * Zzz(Cmpge_d,>=)
748 * Zzz(Cmpge_i,>=)
749 * Zzz(Cmpgt_d,>)
750 * Zzz(Cmpgt_i,>)
751 * Zzz(Cmple_d,<=)
752 * Zzz(Cmple_i,<=)
753 * Zzz(Cmplt_d,<)
754 * Zzz(Cmplt_i,<)
755 * Zzz(Cmpne_d,!=)
756 * Zzz(Cmpne_i,!=)
757 *
758 * Which can be further turned into (well, theoretically):
759 *
760 * #define Zzz(A,B) A qI x B y ; } q1
761 * #define Zxy(A,B) Zzz(Cmp##A##_d,B) Zzz(Cmp##A##_i,B)
762 * Zzz(Cmpeq_i,==)
763 * Zxy(ge,>=)
764 * Zxy(gt,>)
765 * Zxy(le,<=)
766 * Zxy(lt,<)
767 * Zxy(ne,!=)
768 */
769 return false;
770 }
771 }
772
Compress(const std::string & input)773 std::string CPPcompressor::Compress(const std::string& input)
774 {
775 FILE* fp = fopen("cpp_compress_disable", "r");
776 if(fp) { fclose(fp); return input; }
777
778 reset_parametric_macro_list();
779 macro_counter = 0;
780
781 DefineParsingMode defmode;
782 std::vector<token> tokens = Tokenize(input, defmode, false, false);
783
784 int tried_retoken_rounds = 0;
785 std::string seq_name_buf = GenerateMacroName();
786
787 //bool preserve_parens = false;
788
789 StringSeq result;
790 while (true)
791 {
792 if (CompressWithNonparametricMacros(tokens, seq_name_buf))
793 {
794 tried_retoken_rounds = 0;
795 seq_name_buf = GenerateMacroName();
796 }
797 else if (CompressWithParametricMacros(tokens, seq_name_buf))
798 {
799 tried_retoken_rounds = 0;
800 seq_name_buf = GenerateMacroName();
801 }
802 else
803 {
804 if (tried_retoken_rounds >= 4) break;
805 //preserve_parens = true;
806
807 if(verbose) std::cerr << "Retokenizing\n";
808 //static int counter=0; ++counter;
809 //if(counter>=1) {Debug=true;}
810 result = GetSeq(tokens.begin(), tokens.size(), true);
811 //if(counter>=1) break;
812
813 DefineParsingMode defmode;
814 tokens = Tokenize(result, defmode, tried_retoken_rounds&1, tried_retoken_rounds&2);
815 ++tried_retoken_rounds;
816 }
817 }
818 return result.GetString();
819 }
820
Compress(const std::string & input,const std::string & m)821 std::string CPPcompressor::Compress
822 (const std::string& input,
823 const std::string& m)
824 {
825 if(!m.empty()) macro_prefix_chars = m;
826 return Compress(input);
827 }
828