1 /** @file valuerangeproc.cc
2  * @brief Standard ValueRangeProcessor subclass implementations
3  */
4 /* Copyright (C) 2007,2008,2009,2010,2012 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19  */
20 
21 #include <config.h>
22 
23 #include <xapian/queryparser.h>
24 
25 #include <cstdio> // For sprintf().
26 #include <cstdlib> // For atoi().
27 #include "safeerrno.h"
28 
29 #include <string>
30 #include "stringutils.h"
31 
32 using namespace std;
33 
34 namespace Xapian {
35 
36 Xapian::valueno
operator ()(string & begin,string & end)37 StringValueRangeProcessor::operator()(string &begin, string &end)
38 {
39     if (str.size()) {
40 	if (prefix) {
41 	    // If there's a prefix, require it on the start of the range.
42 	    if (!startswith(begin, str)) {
43 		// Prefix not given.
44 		return Xapian::BAD_VALUENO;
45 	    }
46 	    begin.erase(0, str.size());
47 	    // But it's optional on the end of the range, e.g. $10..50
48 	    if (startswith(end, str)) {
49 		end.erase(0, str.size());
50 	    }
51 	} else {
52 	    // If there's a suffix, require it on the end of the range.
53 	    if (!endswith(end, str)) {
54 		// Suffix not given.
55 		return Xapian::BAD_VALUENO;
56 	    }
57 	    end.resize(end.size() - str.size());
58 	    // But it's optional on the start of the range, e.g. 10..50kg
59 	    if (endswith(begin, str)) {
60 		begin.resize(begin.size() - str.size());
61 	    }
62 	}
63     }
64     return valno;
65 }
66 
67 static bool
decode_xxy(const string & s,int & x1,int & x2,int & y)68 decode_xxy(const string & s, int & x1, int &x2, int &y)
69 {
70     if (s.size() == 0) {
71 	x1 = x2 = y = -1;
72 	return true;
73     }
74     if (s.size() < 5 || s.size() > 10) return false;
75     size_t i = s.find_first_not_of("0123456789");
76     if (i < 1 || i > 2 || !(s[i] == '/' || s[i] == '-' || s[i] == '.'))
77 	return false;
78     size_t j = s.find_first_not_of("0123456789", i + 1);
79     if (j - (i + 1) < 1 || j - (i + 1) > 2 ||
80 	!(s[j] == '/' || s[j] == '-' || s[j] == '.'))
81 	return false;
82     if (s.size() - j > 4 + 1) return false;
83     if (s.find_first_not_of("0123456789", j + 1) != string::npos)
84 	return false;
85     x1 = atoi(s.c_str());
86     if (x1 < 1 || x1 > 31) return false;
87     x2 = atoi(s.c_str() + i + 1);
88     if (x2 < 1 || x2 > 31) return false;
89     y = atoi(s.c_str() + j + 1);
90     return true;
91 }
92 
93 // We just use this to decide if an ambiguous aa/bb/cc date could be a
94 // particular format, so there's no need to be anal about the exact number of
95 // days in February.  The most useful check is that the month field is <= 12
96 // so we could just check the day is <= 31 really.
97 static const char max_month_length[12] = {
98     31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
99 };
100 
101 static bool
vet_dm(int d,int m)102 vet_dm(int d, int m)
103 {
104     if (m == -1) return true;
105     if (m > 12 || m < 1) return false;
106     if (d < 1 || d > max_month_length[m - 1]) return false;
107     return true;
108 }
109 
110 // NB Assumes the length has been checked to be 10 already.
111 static bool
is_yyyy_mm_dd(const string & s)112 is_yyyy_mm_dd(const string &s)
113 {
114     return (s.find_first_not_of("0123456789") == 4 &&
115 	    s.find_first_not_of("0123456789", 5) == 7 &&
116 	    s.find_first_not_of("0123456789", 8) == string::npos &&
117 	    s[4] == s[7] &&
118 	    (s[4] == '-' || s[4] == '.' || s[4] == '/'));
119 }
120 
121 Xapian::valueno
operator ()(string & begin,string & end)122 DateValueRangeProcessor::operator()(string &begin, string &end)
123 {
124     if (StringValueRangeProcessor::operator()(begin, end) == BAD_VALUENO)
125 	return BAD_VALUENO;
126 
127     if ((begin.size() == 8 || begin.size() == 0) &&
128 	(end.size() == 8 || end.size() == 0) &&
129 	begin.find_first_not_of("0123456789") == string::npos &&
130 	end.find_first_not_of("0123456789") == string::npos) {
131 	// YYYYMMDD
132 	return valno;
133     }
134     if ((begin.size() == 10 || begin.size() == 0) &&
135 	(end.size() == 10 || end.size() == 0)) {
136 	if ((begin.empty() || is_yyyy_mm_dd(begin)) &&
137 	    (end.empty() || is_yyyy_mm_dd(end))) {
138 	    // YYYY-MM-DD
139 	    if (!begin.empty()) {
140 		begin.erase(7, 1);
141 		begin.erase(4, 1);
142 	    }
143 	    if (!end.empty()) {
144 		end.erase(7, 1);
145 		end.erase(4, 1);
146 	    }
147 	    return valno;
148 	}
149     }
150 
151     int b_d, b_m, b_y;
152     int e_d, e_m, e_y;
153     if (!decode_xxy(begin, b_d, b_m, b_y) || !decode_xxy(end, e_d, e_m, e_y))
154 	return Xapian::BAD_VALUENO;
155 
156     // Check that the month and day are within range.  Also assume "start" <=
157     // "end" to help decide ambiguous cases.
158     if (!prefer_mdy && vet_dm(b_d, b_m) && vet_dm(e_d, e_m) &&
159 	(b_y != e_y || b_m < e_m || (b_m == e_m && b_d <= e_d))) {
160 	// OK.
161     } else if (vet_dm(b_m, b_d) && vet_dm(e_m, e_d) &&
162 	(b_y != e_y || b_d < e_d || (b_d == e_d && b_m <= e_m))) {
163 	swap(b_m, b_d);
164 	swap(e_m, e_d);
165     } else if (prefer_mdy && vet_dm(b_d, b_m) && vet_dm(e_d, e_m) &&
166 	       (b_y != e_y || b_m < e_m || (b_m == e_m && b_d <= e_d))) {
167 	// OK.
168     } else {
169 	return Xapian::BAD_VALUENO;
170     }
171 
172     if (b_y < 100) {
173 	b_y += 1900;
174 	if (b_y < epoch_year) b_y += 100;
175     }
176     if (e_y < 100) {
177 	e_y += 1900;
178 	if (e_y < epoch_year) e_y += 100;
179     }
180 
181 #ifdef SNPRINTF
182     char buf[9];
183     if (!begin.empty()) {
184 	SNPRINTF(buf, sizeof(buf), "%08d", b_y * 10000 + b_m * 100 + b_d);
185 	begin.assign(buf, 8);
186     }
187     if (!end.empty()) {
188 	SNPRINTF(buf, sizeof(buf), "%08d", e_y * 10000 + e_m * 100 + e_d);
189 	end.assign(buf, 8);
190     }
191 #else
192     char buf[100];
193     buf[sizeof(buf) - 1] = '\0';
194     if (!begin.empty()) {
195 	sprintf(buf, "%08d", b_y * 10000 + b_m * 100 + b_d);
196 	if (buf[sizeof(buf) - 1]) abort(); // Buffer overrun!
197 	begin.assign(buf, 8);
198     }
199     if (!end.empty()) {
200 	sprintf(buf, "%08d", e_y * 10000 + e_m * 100 + e_d);
201 	if (buf[sizeof(buf) - 1]) abort(); // Buffer overrun!
202 	end.assign(buf, 8);
203     }
204 #endif
205     return valno;
206 }
207 
208 Xapian::valueno
operator ()(string & begin,string & end)209 NumberValueRangeProcessor::operator()(string &begin, string &end)
210 {
211     if (StringValueRangeProcessor::operator()(begin, end) == BAD_VALUENO)
212 	return BAD_VALUENO;
213 
214     // Parse the numbers to floating point.
215     double beginnum;
216 
217     if (!begin.empty()) {
218 	errno = 0;
219 	const char * startptr = begin.c_str();
220 	char * endptr;
221 	beginnum = strtod(startptr, &endptr);
222 	if (endptr != startptr + begin.size())
223 	    // Invalid characters in string
224 	    return Xapian::BAD_VALUENO;
225 	if (errno)
226 	    // Overflow or underflow
227 	    return Xapian::BAD_VALUENO;
228     } else {
229 	// Silence GCC warning.
230 	beginnum = 0.0;
231     }
232 
233     if (!end.empty()) {
234 	errno = 0;
235 	const char * startptr = end.c_str();
236 	char * endptr;
237 	double endnum = strtod(startptr, &endptr);
238 	if (endptr != startptr + end.size())
239 	    // Invalid characters in string
240 	    return Xapian::BAD_VALUENO;
241 	if (errno)
242 	    // Overflow or underflow
243 	    return Xapian::BAD_VALUENO;
244 	end.assign(Xapian::sortable_serialise(endnum));
245     }
246 
247     if (!begin.empty()) {
248 	begin.assign(Xapian::sortable_serialise(beginnum));
249     }
250 
251     return valno;
252 }
253 
254 }
255