1 /*
2  * Distributed under the Boost Software License, Version 1.0.(See accompanying
3  * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt.)
4  *
5  * See http://www.boost.org/libs/iostreams for documentation.
6 
7  * File:        libs/iostreams/test/grep_test.cpp
8  * Date:        Mon May 26 17:48:45 MDT 2008
9  * Copyright:   2008 CodeRage, LLC
10  * Author:      Jonathan Turkanis
11  * Contact:     turkanis at coderage dot com
12  *
13  * Tests the class template basic_grep_filter.
14  */
15 
16 #include <iostream>
17 
18 #include <boost/config.hpp>  // Make sure ptrdiff_t is in std.
19 #include <algorithm>
20 #include <cstddef>           // std::ptrdiff_t
21 #include <string>
22 #include <boost/iostreams/compose.hpp>
23 #include <boost/iostreams/copy.hpp>
24 #include <boost/iostreams/device/array.hpp>
25 #include <boost/iostreams/device/back_inserter.hpp>
26 #include <boost/iostreams/filter/grep.hpp>
27 #include <boost/iostreams/filter/test.hpp>
28 #include <boost/ref.hpp>
29 #include <boost/regex.hpp>
30 #include <boost/test/test_tools.hpp>
31 #include <boost/test/unit_test.hpp>
32 
33 using namespace boost;
34 using namespace boost::iostreams;
35 namespace io = boost::iostreams;
36 using boost::unit_test::test_suite;
37 
38 // List of addresses of US Appeals Courts, from uscourts.gov
39 std::string addresses =
40     "John Joseph Moakley United States Courthouse, Suite 2500\n"
41     "One Courthouse Way\n"
42     "Boston, MA 02210-3002\n"
43     "\n"
44     "Thurgood Marshall United States Courthouse, 18th Floor\n"
45     "40 Centre Street\n"
46     "New York, NY 10007-1501\n"
47     "\n"
48     "21400 James A. Byrne United States Courthouse\n"
49     "601 Market Street\n"
50     "Philadelphia, PA 19106-1729\n"
51     "\n"
52     "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
53     "1100 East Main Street\n"
54     "Richmond, VA 23219-3525\n"
55     "\n"
56     "F. Edward Hebert Federal Bldg\n"
57     "600 South Maestri Place\n"
58     "New Orleans, LA 70130\n"
59     "\n"
60     "Bob Casey United States Courthouse, 1st Floor\n"
61     "515 Rusk Street\n"
62     "Houston, TX 77002-2600\n"
63     "\n"
64     "Potter Stewart United States Courthouse, Suite 540\n"
65     "100 East Fifth Street\n"
66     "Cincinnati, OH 45202\n"
67     "\n"
68     "2722 Everett McKinley Dirksen United States Courthouse\n"
69     "219 South Dearborn Street\n"
70     "Chicago, IL 60604\n";
71 
72 // Lines containing "United States Courthouse"
73 std::string us_courthouse =
74     "John Joseph Moakley United States Courthouse, Suite 2500\n"
75     "Thurgood Marshall United States Courthouse, 18th Floor\n"
76     "21400 James A. Byrne United States Courthouse\n"
77     "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
78     "Bob Casey United States Courthouse, 1st Floor\n"
79     "Potter Stewart United States Courthouse, Suite 540\n"
80     "2722 Everett McKinley Dirksen United States Courthouse\n";
81 
82 // Lines not containing "United States Courthouse"
83 std::string us_courthouse_inv =
84     "One Courthouse Way\n"
85     "Boston, MA 02210-3002\n"
86     "\n"
87     "40 Centre Street\n"
88     "New York, NY 10007-1501\n"
89     "\n"
90     "601 Market Street\n"
91     "Philadelphia, PA 19106-1729\n"
92     "\n"
93     "1100 East Main Street\n"
94     "Richmond, VA 23219-3525\n"
95     "\n"
96     "F. Edward Hebert Federal Bldg\n"
97     "600 South Maestri Place\n"
98     "New Orleans, LA 70130\n"
99     "\n"
100     "515 Rusk Street\n"
101     "Houston, TX 77002-2600\n"
102     "\n"
103     "100 East Fifth Street\n"
104     "Cincinnati, OH 45202\n"
105     "\n"
106     "219 South Dearborn Street\n"
107     "Chicago, IL 60604\n";
108 
109 // Lines containing a state and zip
110 std::string state_and_zip =
111     "Boston, MA 02210-3002\n"
112     "New York, NY 10007-1501\n"
113     "Philadelphia, PA 19106-1729\n"
114     "Richmond, VA 23219-3525\n"
115     "New Orleans, LA 70130\n"
116     "Houston, TX 77002-2600\n"
117     "Cincinnati, OH 45202\n"
118     "Chicago, IL 60604\n";
119 
120 // Lines not containing a state and zip
121 std::string state_and_zip_inv =
122     "John Joseph Moakley United States Courthouse, Suite 2500\n"
123     "One Courthouse Way\n"
124     "\n"
125     "Thurgood Marshall United States Courthouse, 18th Floor\n"
126     "40 Centre Street\n"
127     "\n"
128     "21400 James A. Byrne United States Courthouse\n"
129     "601 Market Street\n"
130     "\n"
131     "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
132     "1100 East Main Street\n"
133     "\n"
134     "F. Edward Hebert Federal Bldg\n"
135     "600 South Maestri Place\n"
136     "\n"
137     "Bob Casey United States Courthouse, 1st Floor\n"
138     "515 Rusk Street\n"
139     "\n"
140     "Potter Stewart United States Courthouse, Suite 540\n"
141     "100 East Fifth Street\n"
142     "\n"
143     "2722 Everett McKinley Dirksen United States Courthouse\n"
144     "219 South Dearborn Street\n";
145 
146 // Lines containing at least three words
147 std::string three_words =
148     "John Joseph Moakley United States Courthouse, Suite 2500\n"
149     "One Courthouse Way\n"
150     "Thurgood Marshall United States Courthouse, 18th Floor\n"
151     "40 Centre Street\n"
152     "21400 James A. Byrne United States Courthouse\n"
153     "601 Market Street\n"
154     "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
155     "1100 East Main Street\n"
156     "F. Edward Hebert Federal Bldg\n"
157     "600 South Maestri Place\n"
158     "Bob Casey United States Courthouse, 1st Floor\n"
159     "515 Rusk Street\n"
160     "Potter Stewart United States Courthouse, Suite 540\n"
161     "100 East Fifth Street\n"
162     "2722 Everett McKinley Dirksen United States Courthouse\n"
163     "219 South Dearborn Street\n";
164 
165 // Lines containing exactly three words
166 std::string exactly_three_words =
167     "One Courthouse Way\n"
168     "40 Centre Street\n"
169     "601 Market Street\n"
170     "515 Rusk Street\n";
171 
172 // Lines that don't contain exactly three words
173 std::string exactly_three_words_inv =
174     "John Joseph Moakley United States Courthouse, Suite 2500\n"
175     "Boston, MA 02210-3002\n"
176     "\n"
177     "Thurgood Marshall United States Courthouse, 18th Floor\n"
178     "New York, NY 10007-1501\n"
179     "\n"
180     "21400 James A. Byrne United States Courthouse\n"
181     "Philadelphia, PA 19106-1729\n"
182     "\n"
183     "Lewis F. Powell, Jr. United States Courthouse Annex, Suite 501\n"
184     "1100 East Main Street\n"
185     "Richmond, VA 23219-3525\n"
186     "\n"
187     "F. Edward Hebert Federal Bldg\n"
188     "600 South Maestri Place\n"
189     "New Orleans, LA 70130\n"
190     "\n"
191     "Bob Casey United States Courthouse, 1st Floor\n"
192     "Houston, TX 77002-2600\n"
193     "\n"
194     "Potter Stewart United States Courthouse, Suite 540\n"
195     "100 East Fifth Street\n"
196     "Cincinnati, OH 45202\n"
197     "\n"
198     "2722 Everett McKinley Dirksen United States Courthouse\n"
199     "219 South Dearborn Street\n"
200     "Chicago, IL 60604\n";
201 
202 void test_filter( grep_filter grep,
203                   const std::string& input,
204                   const std::string& output );
205 
grep_filter_test()206 void grep_filter_test()
207 {
208     regex match_us_courthouse("\\bUnited States Courthouse\\b");
209     regex match_state_and_zip("\\b[A-Z]{2}\\s+[0-9]{5}(-[0-9]{4})?\\b");
210     regex match_three_words("\\b\\w+\\s+\\w+\\s+\\w+\\b");
211     regex_constants::match_flag_type match_default =
212         regex_constants::match_default;
213 
214     {
215         grep_filter grep(match_us_courthouse);
216         test_filter(grep, addresses, us_courthouse);
217     }
218 
219     {
220         grep_filter grep(match_us_courthouse, match_default, grep::invert);
221         test_filter(grep, addresses, us_courthouse_inv);
222     }
223 
224     {
225         grep_filter grep(match_state_and_zip);
226         test_filter(grep, addresses, state_and_zip);
227     }
228 
229     {
230         grep_filter grep(match_state_and_zip, match_default, grep::invert);
231         test_filter(grep, addresses, state_and_zip_inv);
232     }
233 
234     {
235         grep_filter grep(match_three_words);
236         test_filter(grep, addresses, three_words);
237     }
238 
239     {
240         grep_filter grep(match_three_words, match_default, grep::whole_line);
241         test_filter(grep, addresses, exactly_three_words);
242     }
243 
244     {
245         int options = grep::whole_line | grep::invert;
246         grep_filter grep(match_three_words, match_default, options);
247         test_filter(grep, addresses, exactly_three_words_inv);
248     }
249 }
250 
test_filter(grep_filter grep,const std::string & input,const std::string & output)251 void test_filter( grep_filter grep,
252                   const std::string& input,
253                   const std::string& output )
254 {
255     // Count lines in output
256     std::ptrdiff_t count = std::count(output.begin(), output.end(), '\n');
257 
258     // Test as input filter
259     {
260         array_source  src(input.data(), input.data() + input.size());
261         std::string   dest;
262         io::copy(compose(boost::ref(grep), src), io::back_inserter(dest));
263         BOOST_CHECK(dest == output);
264         BOOST_CHECK(grep.count() == count);
265     }
266 
267     // Test as output filter
268     {
269         array_source  src(input.data(), input.data() + input.size());
270         std::string   dest;
271         io::copy(src, compose(boost::ref(grep), io::back_inserter(dest)));
272         BOOST_CHECK(dest == output);
273         BOOST_CHECK(grep.count() == count);
274     }
275 }
276 
init_unit_test_suite(int,char * [])277 test_suite* init_unit_test_suite(int, char* [])
278 {
279     test_suite* test = BOOST_TEST_SUITE("grep_filter test");
280     test->add(BOOST_TEST_CASE(&grep_filter_test));
281     return test;
282 }
283