1 ///////////////////////////////////////////////////////////////
2 //  Copyright 2015 John Maddock. Distributed under the Boost
3 //  Software License, Version 1.0. (See accompanying file
4 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_
5 //
6 
7 #include "performance.hpp"
8 #include <list>
9 #include <fstream>
10 #include <iostream>
11 #include <iomanip>
12 #include <boost/chrono.hpp>
13 #include <boost/detail/lightweight_main.hpp>
14 #include <boost/regex.hpp>
15 #include <boost/filesystem.hpp>
16 
load_file(std::string & text,const char * file)17 void load_file(std::string& text, const char* file)
18 {
19    std::deque<char> temp_copy;
20    std::ifstream is(file);
21    if(!is.good())
22    {
23       std::string msg("Unable to open file: \"");
24       msg.append(file);
25       msg.append("\"");
26       throw std::runtime_error(msg);
27    }
28    is.seekg(0, std::ios_base::end);
29    std::istream::pos_type pos = is.tellg();
30    is.seekg(0, std::ios_base::beg);
31    text.erase();
32    text.reserve(pos);
33    std::istreambuf_iterator<char> it(is);
34    std::copy(it, std::istreambuf_iterator<char>(), std::back_inserter(text));
35 }
36 
37 
38 typedef std::list<boost::shared_ptr<abstract_regex> > list_type;
39 
engines()40 list_type& engines()
41 {
42    static list_type l;
43    return l;
44 }
45 
register_instance(boost::shared_ptr<abstract_regex> item)46 void abstract_regex::register_instance(boost::shared_ptr<abstract_regex> item)
47 {
48    engines().push_back(item);
49 }
50 
51 template <class Clock>
52 struct stopwatch
53 {
54    typedef typename Clock::duration duration;
stopwatchstopwatch55    stopwatch()
56    {
57       m_start = Clock::now();
58    }
elapsedstopwatch59    duration elapsed()
60    {
61       return Clock::now() - m_start;
62    }
resetstopwatch63    void reset()
64    {
65       m_start = Clock::now();
66    }
67 
68 private:
69    typename Clock::time_point m_start;
70 };
71 
72 unsigned sum = 0;
73 unsigned last_value_returned = 0;
74 
75 template <class Func>
exec_timed_test(Func f)76 double exec_timed_test(Func f)
77 {
78    double t = 0;
79    unsigned repeats = 1;
80    do {
81       stopwatch<boost::chrono::high_resolution_clock> w;
82 
83       for(unsigned count = 0; count < repeats; ++count)
84       {
85          last_value_returned  = f();
86          sum += last_value_returned;
87       }
88 
89       t = boost::chrono::duration_cast<boost::chrono::duration<double>>(w.elapsed()).count();
90       if(t < 0.5)
91          repeats *= 2;
92    } while(t < 0.5);
93    return t / repeats;
94 }
95 
96 
format_expression_as_quickbook(std::string s)97 std::string format_expression_as_quickbook(std::string s)
98 {
99    static const boost::regex e("[`/_*=$^@#&%\\\\]");
100    static const boost::regex open_b("\\[");
101    static const boost::regex close_b("\\]");
102    s = regex_replace(s, e, "\\\\$0");
103    s = regex_replace(s, open_b, "\\\\u005B");
104    s = regex_replace(s, close_b, "\\\\u005D");
105    if(s.size() > 200)
106    {
107       s.erase(200);
108       s += " ...";
109    }
110    return "[^" + s + "]";
111 }
112 
test_match(const char * expression,const char * text,bool isperl=false)113 void test_match(const char* expression, const char* text, bool isperl = false)
114 {
115    std::string table = "Testing simple " + (isperl ? std::string("Perl") : std::string("leftmost-longest")) + " matches (platform = " + platform_name() + ", compiler = " + compiler_name() + ")";
116    std::string row = format_expression_as_quickbook(expression);
117    row += "[br]";
118    row += format_expression_as_quickbook(text);
119    for(list_type::const_iterator i = engines().begin(); i != engines().end(); ++i)
120    {
121       std::string heading = (*i)->name();
122       if((*i)->set_expression(expression, isperl))
123       {
124          double time = exec_timed_test([&]() { return (*i)->match_test(text) ? 1 : 0; });
125          report_execution_time(time, table, row, heading);
126       }
127    }
128 }
129 
test_search(const char * expression,const char * text,bool isperl=false,const char * filename=0)130 void test_search(const char* expression, const char* text, bool isperl = false, const char* filename = 0)
131 {
132    std::string table = "Testing " + (isperl ? std::string("Perl") : std::string("leftmost-longest")) + " searches (platform = " + platform_name() + ", compiler = " + compiler_name() + ")";
133    std::string row = format_expression_as_quickbook(expression);
134    row += "[br]";
135    if(filename)
136    {
137       row += "In file: ";
138       row += filename;
139    }
140    else
141    {
142       row += format_expression_as_quickbook(text);
143    }
144    for(list_type::const_iterator i = engines().begin(); i != engines().end(); ++i)
145    {
146       std::string heading = (*i)->name();
147       if((*i)->set_expression(expression, isperl))
148       {
149          double time = exec_timed_test([&]() { return (*i)->find_all(text); });
150          report_execution_time(time, table, row, heading);
151          std::cout << "Search with library: " << heading << " found " << last_value_returned << " occurances.\n";
152       }
153    }
154 }
155 
cpp_main(int argc,char * argv[])156 int cpp_main(int argc, char* argv[])
157 {
158    boost::filesystem::path here(__FILE__);
159    here = here.parent_path().parent_path().parent_path().parent_path();
160 
161    boost::filesystem::path cpp_file = here / "boost";
162    cpp_file /= "crc.hpp";
163 
164    // start with a simple test, this is basically a measure of the minimal overhead
165    // involved in calling a regex matcher:
166    test_match("abc", "abc");
167    // these are from the regex docs:
168    test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string");
169    test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456");
170    // these are from http://www.regxlib.com/
171    test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "john@johnmaddock.co.uk");
172    test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "foo12@foo.edu");
173    test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "bob.smith@foo.tv");
174    test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ");
175    test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA");
176    test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ");
177    test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001");
178    test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001");
179    test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123");
180    test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159");
181    test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159");
182 
183    // start with a simple test, this is basically a measure of the minimal overhead
184    // involved in calling a regex matcher:
185    test_match("abc", "abc", true);
186    // these are from the regex docs:
187    test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string", true);
188    test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456", true);
189    // these are from http://www.regxlib.com/
190    test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "john@johnmaddock.co.uk", true);
191    test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "foo12@foo.edu", true);
192    test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "bob.smith@foo.tv", true);
193    test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ", true);
194    test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA", true);
195    test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ", true);
196    test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001", true);
197    test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001", true);
198    test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123", true);
199    test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159", true);
200    test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159", true);
201 
202    std::string file_contents;
203 
204    const char* highlight_expression = // preprocessor directives: index 1
205       "(^[ \\t]*#(?:(?>[^\\\\\\n]+)|\\\\(?>\\s*\\n|.))*)|";
206       // comment: index 2
207       "(//[^\\n]*|/\\*.*?\\*/)|"
208       // literals: index 3
209       "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
210       // string literals: index 4
211       "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
212       // keywords: index 5
213       "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
214       "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
215       "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
216       "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
217       "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
218       "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
219       "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
220       "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
221       "|using|virtual|void|volatile|wchar_t|while)\\>"
222       ;
223    const char* class_expression = "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
224       "(class|struct)[[:space:]]*(\\w+([ \t]*\\([^)]*\\))?"
225       "[[:space:]]*)*(\\w*)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
226       "(\\{|:[^;\\{()]*\\{)";
227    const char* call_expression = "\\w+\\s*(\\([^()]++(?:(?1)[^()]++)*+[^)]*\\))";
228 
229    const char* include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"[^\"]+\"|<[^>]+>)";
230    const char* boost_include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"boost/[^\"]+\"|<boost/[^>]+>)";
231    const char* brace_expression = "\\{[^{}]++((?0)[^{}]++)*+[^}]*+\\}";
232    const char* function_with_body_expression = "(\\w+)\\s*(\\([^()]++(?:(?2)[^()]++)*+[^)]*\\))\\s*(\\{[^{}]++((?3)[^{}]++)*+[^}]*+\\})";
233 
234 
235    load_file(file_contents, "../../../libs/libraries.htm");
236    test_search("Beman|John|Dave", file_contents.c_str(), false, "../../../libs/libraries.htm");
237    test_search("Beman|John|Dave", file_contents.c_str(), true, "../../../libs/libraries.htm");
238    test_search("(?i)<p>.*?</p>", file_contents.c_str(), true, "../../../libs/libraries.htm");
239    test_search("<a[^>]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents.c_str(), false, "../../../libs/libraries.htm");
240    test_search("(?i)<a[^>]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents.c_str(), true, "../../../libs/libraries.htm");
241    test_search("(?i)<h[12345678][^>]*>.*?</h[12345678]>", file_contents.c_str(), true, "../../../libs/libraries.htm");
242    test_search("<img[^>]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents.c_str(), false, "../../../libs/libraries.htm");
243    test_search("(?i)<img[^>]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents.c_str(), true, "../../../libs/libraries.htm");
244    test_search("(?i)<font[^>]+face=(\"[^\"]*\"|[^[:space:]]+)[^>]*>.*?</font>", file_contents.c_str(), true, "../../../libs/libraries.htm");
245 
246 
247    load_file(file_contents, "../../../boost/multiprecision/number.hpp");
248 
249    test_search(function_with_body_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
250    test_search(brace_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
251    test_search(call_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
252    test_search(highlight_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
253    test_search(class_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
254    test_search(include_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
255    test_search(boost_include_expression, file_contents.c_str(), true, "boost/multiprecision/number.hpp");
256 
257    return 0;
258 }
259 
260