1 /*
2 # Copyright (C) 2015 Fulvio Benini
3 
4 * This file is part of Scid (Shane's Chess Information Database).
5 *
6 * Scid is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation.
9 *
10 * Scid is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with Scid. If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #include "common.h"
20 #include "misc.h"
21 #include "scidbase.h"
22 #include <algorithm>
23 #include <string>
24 #include <vector>
25 
26 namespace {
27 
28 class SearchName {
29 	const scidBaseT* base_;
30 	idNumberT (IndexEntry::* f1_) () const;
31 	idNumberT (IndexEntry::* f2_) () const;
32 	std::vector<bool> mask_;
33 
34 public:
SearchName(const scidBaseT * base,std::string pattern,nameT name_type,idNumberT (IndexEntry::* f1)()const,idNumberT (IndexEntry::* f2)()const=0)35 	SearchName(const scidBaseT* base,
36 	           std::string pattern,
37 	           nameT name_type,
38 	           idNumberT (IndexEntry::* f1) () const,
39 	           idNumberT (IndexEntry::* f2) () const = 0)
40 	: base_(base), f1_(f1), f2_(f2) {
41 		idNumberT n = base->getNameBase()->GetNumNames(name_type);
42 		mask_.resize(n);
43 
44 		size_t l = pattern.length();
45 		bool exact = (l > 2 && pattern[l -1] == '"' && pattern[0] == '"');
46 		if (exact) {
47 			Init_exact(n, name_type, pattern.substr(1, l -2));
48 		} else {
49 			Init_icase_ignoreSpaces(n, name_type, pattern.c_str());
50 		}
51 	}
52 
Init_exact(idNumberT n,nameT name_type,std::string pattern)53 	void Init_exact (idNumberT n, nameT name_type, std::string pattern) {
54 		const NameBase* nb = base_->getNameBase();
55 		for (idNumberT i=0; i < n; i++) {
56 			const char* name = nb->GetName (name_type, i);
57 			mask_[i] = (name == pattern);
58 		}
59 	}
60 
Init_icase_ignoreSpaces(idNumberT n,nameT name_type,const char * pattern)61 	void Init_icase_ignoreSpaces(idNumberT n, nameT name_type, const char* pattern) {
62 		const NameBase* nb = base_->getNameBase();
63 		for (idNumberT i=0; i < n; i++) {
64 			const char* name = nb->GetName (name_type, i);
65 			mask_[i] = strAlphaContains(name, pattern);
66 		}
67 	}
68 
operator ()(gamenumT gnum) const69 	bool operator() (gamenumT gnum) const {
70 		bool res = mask_[(base_->getIndexEntry(gnum)->*f1_)()];
71 		if (!res && f2_ != 0) {
72 			return mask_[(base_->getIndexEntry(gnum)->*f2_)()];
73 		}
74 		return res;
75 	}
76 };
77 
78 class SearchSiteCountry {
79 	const scidBaseT* base_;
80 	std::vector<bool> mask_;
81 
82 public:
SearchSiteCountry(const scidBaseT * base,std::string country)83 	SearchSiteCountry(const scidBaseT* base,
84 	                  std::string country)
85 	: base_(base) {
86 		idNumberT n = base->getNameBase()->GetNumNames(NAME_SITE);
87 		mask_.resize(n);
88 
89 		std::transform(country.begin(), country.end(), country.begin(), ::toupper);
90 		const NameBase* nb = base_->getNameBase();
91 		for (idNumberT i=0; i < n; i++) {
92 			const char* site = nb->GetName(NAME_SITE, i);
93 			const char* it = site;
94 			while (*it != 0) it++;
95 			if (std::distance(site, it) > 3) it -= 3;
96 			std::string tmp = it;
97 			std::transform(tmp.begin(), tmp.end(), tmp.begin(), ::toupper);
98 			mask_[i] = (tmp == country);
99 		}
100 	}
101 
operator ()(gamenumT gnum) const102 	bool operator() (gamenumT gnum) const {
103 		return mask_[base_->getIndexEntry(gnum)->GetSite()];
104 	}
105 };
106 
107 class SearchFlag {
108 	const scidBaseT* base_;
109 	uint32_t flagMask_;
110 
111 public:
SearchFlag(const scidBaseT * base,const char * flags)112 	SearchFlag(const scidBaseT* base,
113 	           const char* flags)
114 	: base_(base) {
115 		flagMask_ = IndexEntry::StrToFlagMask(flags);
116 		ASSERT(flagMask_ != 0);
117 	}
118 
operator ()(gamenumT gnum) const119 	bool operator() (gamenumT gnum) const {
120 		return base_->getIndexEntry(gnum)->GetFlag(flagMask_);
121 	}
122 };
123 
124 class SearchResult {
125 	const scidBaseT* base_;
126 	bool result_[NUM_RESULT_TYPES];
127 
128 public:
SearchResult(const scidBaseT * base,const char * results)129 	SearchResult(const scidBaseT* base,
130 	             const char* results)
131 	: base_(base) {
132 		std::fill_n(result_, NUM_RESULT_TYPES, false);
133 		const char* end = RESULT_CHAR + NUM_RESULT_TYPES;
134 		while (*results != 0) {
135 			const char* it = std::find(RESULT_CHAR, end, *results);
136 			if (it != end) result_[std::distance(RESULT_CHAR, it)] = true;
137 			results++;
138 		}
139 	}
140 
operator ()(gamenumT gnum) const141 	bool operator() (gamenumT gnum) const {
142 		resultT r = base_->getIndexEntry(gnum)->GetResult();
143 		return result_[r];
144 	}
145 };
146 
147 template <typename T>
148 class SearchRange : public StrRange {
149 protected:
150 	const scidBaseT* base_;
151 	T (IndexEntry::* f_) () const;
152 
153 protected:
SearchRange(const scidBaseT * base,T (IndexEntry::* f)()const)154 	SearchRange(const scidBaseT* base,
155 	            T (IndexEntry::* f) () const)
156 	: base_(base), f_(f) {}
157 
158 public:
SearchRange(const scidBaseT * base,const char * range,T (IndexEntry::* f)()const)159 	SearchRange(const scidBaseT* base,
160 	            const char* range,
161 	            T (IndexEntry::* f) () const)
162 	: StrRange(range), base_(base), f_(f) {}
163 
operator ()(gamenumT gnum) const164 	bool operator() (gamenumT gnum) const {
165 		long v = (base_->getIndexEntry(gnum)->*f_)();
166 		return inRange(v);
167 	}
168 };
169 
170 class SearchRangeDate : public SearchRange<dateT> {
171 public:
SearchRangeDate(const scidBaseT * base,const char * range,dateT (IndexEntry::* f)()const)172 	SearchRangeDate(const scidBaseT* base,
173 	                const char* range,
174 	                dateT (IndexEntry::* f) () const)
175 	: SearchRange<dateT>(base, f) {
176 		// Extract two whitespace-separated dates:
177 		const char* v = strFirstWord(range);
178 		min_ = date_EncodeFromString (v);
179 		const char* next = strNextWord(v);
180 		max_ = (*next == 0) ? min_ : date_EncodeFromString (next);
181 		if (min_ > max_) std::swap(min_, max_);
182 	}
183 };
184 
185 class SearchRangeEco : public SearchRange<ecoT> {
186 public:
SearchRangeEco(const scidBaseT * base,const char * range,ecoT (IndexEntry::* f)()const)187 	SearchRangeEco(const scidBaseT* base,
188 	               const char* range,
189 	               ecoT (IndexEntry::* f) () const)
190 	: SearchRange<ecoT>(base, f) {
191 		// Extract two whitespace-separated ECO codes:
192 		const char* v = strFirstWord(range);
193 		min_ = eco_FromString(v);
194 		const char* next = strNextWord(v);
195 		max_ = (*next == 0) ? min_ : eco_FromString(next);
196 		if (min_ > max_) std::swap(min_, max_);
197 		// Set eco maximum to be the largest subcode, for example,
198 		// "B07" -> "B07z4" to make sure subcodes are included in the range:
199 		max_ = eco_LastSubCode(static_cast<ecoT>(max_));
200 	}
201 };
202 
203 class SearchRangeGamenum : public SearchRange<gamenumT> {
204 public:
SearchRangeGamenum(const scidBaseT * base,const char * range)205 	SearchRangeGamenum(const scidBaseT* base,
206 	                   const char* range)
207 	: SearchRange<gamenumT>(base, range, 0) {
208 		// Set up game number range:
209 		// Note that a negative number means a count from the end,
210 		// so -1 = last game, -2 = second to last, etc.
211 		if (min_ < 0) min_ += base->numGames();
212 		else min_ -=1;
213 		if (max_ < 0) max_ += base->numGames();
214 		else max_ -=1;
215 		if (min_ > max_) std::swap(min_, max_);
216 	}
217 
operator ()(gamenumT gnum) const218 	bool operator() (gamenumT gnum) const {
219 		if (static_cast<long>(gnum) < min_ ||
220 			static_cast<long>(gnum) > max_) return false;
221 		return true;
222 	}
223 };
224 
225 class SearchRangeElo : public SearchRange<eloT> {
226 protected:
227 	eloT (IndexEntry::* fElo1_) (const NameBase*) const;
228 	eloT (IndexEntry::* fElo2_) (const NameBase*) const;
229 	const NameBase* nb_;
230 
231 public:
SearchRangeElo(const scidBaseT * base,const char * range,eloT (IndexEntry::* f1)(const NameBase *)const,eloT (IndexEntry::* f2)(const NameBase *)const=0)232 	SearchRangeElo(const scidBaseT* base,
233 	               const char* range,
234 	               eloT (IndexEntry::* f1) (const NameBase*) const,
235 	               eloT (IndexEntry::* f2) (const NameBase*) const = 0)
236 	: SearchRange<eloT>(base, range, 0), fElo1_(f1), fElo2_(f2) {
237 		nb_ = base_->getNameBase();
238 	}
239 
operator ()(gamenumT gnum) const240 	bool operator() (gamenumT gnum) const {
241 		long v1 = (base_->getIndexEntry(gnum)->*fElo1_)(nb_);
242 		long v2 = min_;
243 		if (fElo2_ != 0) v2 = (base_->getIndexEntry(gnum)->*fElo2_)(nb_);
244 		if (v1 < min_ || v1 > max_ || v2 < min_ || v2 > max_) return false;
245 		return true;
246 	}
247 };
248 
249 class SearchRangeEloDiff : public SearchRangeElo {
250 public:
SearchRangeEloDiff(const scidBaseT * base,const char * range,eloT (IndexEntry::* f1)(const NameBase *)const,eloT (IndexEntry::* f2)(const NameBase *)const)251 	SearchRangeEloDiff(const scidBaseT* base,
252 	                   const char* range,
253 	                   eloT (IndexEntry::* f1) (const NameBase*) const,
254 	                   eloT (IndexEntry::* f2) (const NameBase*) const)
255 	: SearchRangeElo(base, range, f1, f2) {}
256 
operator ()(gamenumT gnum) const257 	bool operator() (gamenumT gnum) const {
258 		long v1 = (base_->getIndexEntry(gnum)->*fElo1_)(nb_);
259 		long v2 = (base_->getIndexEntry(gnum)->*fElo2_)(nb_);
260 		long v = v1 - v2;
261 		if (v < min_ || v > max_) return false;
262 		return true;
263 	}
264 };
265 
266 /**
267  * class SearchParam - criteria for the search
268  *
269  * a SearchParam is a string pair <criteria,value>.
270  * criteria should start with '-' and can include optional trailing chars:
271  *     '!' look for games that do _not_ match the criteria
272  *     '|' look for games that matches the previous criteria _or_ this critera
273  *         _or_ operations can be concatenated, so -player a -player| b -player| c
274  *         means player contains 'a' or 'b' or 'c'
275  * This class do not check if criteria is valid.
276  */
277 class SearchParam {
278 	std::string op_;
279 	const char* value_;
280 	bool opNot_;
281 	bool opOr_;
282 
283 public:
SearchParam(const char * op,const char * value)284 	SearchParam(const char* op, const char* value)
285 	: value_(value), opNot_(false), opOr_(false) {
286 		if (op != 0 && value_ != 0 && value_[0] != 0) {
287 			op_ = op;
288 			if (op_.length() > 1 && op_[0] == '-') {
289 				op_.erase(0, 1);
290 
291 				size_t extra = op_.find_last_not_of("!|");
292 				if (extra == std::string::npos) {
293 					op_.clear();
294 				} else if (++extra < op_.length()) {
295 					if (op_.find('!', extra) != std::string::npos) opNot_ = true;
296 					if (op_.find('|', extra) != std::string::npos) opOr_ = true;
297 					op_.erase(extra);
298 				}
299 			}
300 		}
301 	}
302 
invalidate()303 	void invalidate() {
304 		op_.clear();
305 		opNot_ = false;
306 		opOr_ = false;
307 	}
308 
operator !() const309 	bool operator!() const { return op_.empty(); }
operator ==(const char * s) const310 	bool operator==(const char* s) const {
311 		// Compares @s with op_ (op_ do not contain the starting '-' char
312 		// and the optional trailing '!' and '|' chars.
313 		return op_ == s;
314 	}
315 
getValue() const316 	const char* getValue() const { return value_; }
isNot() const317 	bool isNot() const { return opNot_; }
isOr() const318 	bool isOr() const { return opOr_; }
319 };
320 
321 /**
322  * parseParams() - parse search criteria
323  *
324  * Return: a vector containing the SearchParams for the search
325  * and set @filterOp to the requested operation.
326  */
parseParams(int argc,const char ** argv,filterOpT & filterOp)327 std::vector<SearchParam> parseParams(int argc, const char ** argv, filterOpT& filterOp) {
328 	std::vector<SearchParam> res;
329 
330 	for (int i=0; (i+1) < argc; i += 2) {
331 		SearchParam p(argv[i], argv[i+1]);
332 		if (!p) continue;
333 
334 		if (p == "filter")
335 			filterOp = strGetFilterOp(p.getValue());
336 		else
337 			res.push_back(p);
338 	}
339 
340 	return res;
341 }
342 
343 /**
344  * gamesToSearch() - create the vector of games to be searched
345  * @base:      the scidBaseT to search
346  * @filter:    games to be searched
347  * @filterOp:  FILTEROP_RESET -> reset @filter to include all games
348  *             FILTEROP_AND   -> search only games in @filter
349  *             FILTEROP_OR    -> search only games not in @filter
350  *
351  * Return: a vector containing the games (game number) to be searched.
352  * If @filterOp == FILTEROP_RESET then
353  *         @filter is reset to include all games
354  *         @filterOp is changed to FILTEROP_AND
355  */
collectGames(HFilter & filter,filterOpT & filterOp)356 std::vector<gamenumT> collectGames(HFilter& filter, filterOpT& filterOp) {
357 	ASSERT(filter != 0);
358 
359 	std::vector<gamenumT> res;
360 	switch (filterOp) {
361 	case FILTEROP_RESET:
362 		filter->includeAll();
363 		filterOp = FILTEROP_AND;
364 		/* FALLTHRU */
365 	case FILTEROP_AND:
366 		res.resize(filter->size());
367 		std::copy(filter->begin(), filter->end(), res.begin());
368 		break;
369 	case FILTEROP_OR:
370 		HFilterInverted excluded(filter);
371 		res.resize(excluded.size());
372 		std::copy(excluded.begin(), excluded.end(), res.begin());
373 		break;
374 	}
375 	return res;
376 }
377 
378 /**
379  * doSearch() - partition the input range
380  * @itB:    begin of the range
381  * @itR:    previous partition point
382  * @itE:    end of the range
383  * @base:   the scidBaseT to search
384  * @param:  parameters for the search
385  *
386  * Return: the new partition point so that the range [itB, result) contains the matching games
387  * If @param is not found, invalidate it and return the previous partition point (@itR)
388  */
389 template<typename I>
doSearch(I itB,I itR,I itE,const scidBaseT * base,SearchParam & param)390 I doSearch(I itB, I itR, I itE, const scidBaseT* base, SearchParam& param) {
391 	if (param == "player") return std::stable_partition(itB, itE,
392 		SearchName(base, param.getValue(), NAME_PLAYER, &IndexEntry::GetWhite, &IndexEntry::GetBlack)
393 	);
394 	if (param == "white") return std::stable_partition(itB, itE,
395 		SearchName(base, param.getValue(), NAME_PLAYER, &IndexEntry::GetWhite)
396 	);
397 	if (param == "black") return std::stable_partition(itB, itE,
398 		SearchName(base, param.getValue(), NAME_PLAYER, &IndexEntry::GetBlack)
399 	);
400 	if (param == "event") return std::stable_partition(itB, itE,
401 		SearchName(base, param.getValue(), NAME_EVENT, &IndexEntry::GetEvent)
402 	);
403 	if (param == "site") return std::stable_partition(itB, itE,
404 		SearchName(base, param.getValue(), NAME_SITE, &IndexEntry::GetSite)
405 	);
406 	if (param == "sitecountry") return std::stable_partition(itB, itE,
407 		SearchSiteCountry(base, param.getValue())
408 	);
409 	if (param == "round") return std::stable_partition(itB, itE,
410 		SearchName(base, param.getValue(), NAME_ROUND, &IndexEntry::GetRound)
411 	);
412 	if (param == "date") return std::stable_partition(itB, itE,
413 		SearchRangeDate(base, param.getValue(), &IndexEntry::GetDate)
414 	);
415 	if (param == "eventdate") return std::stable_partition(itB, itE,
416 		SearchRangeDate(base, param.getValue(), &IndexEntry::GetEventDate)
417 	);
418 	if (param == "elo") return std::stable_partition(itB, itE,
419 		SearchRangeElo(base, param.getValue(), &IndexEntry::GetWhiteElo, &IndexEntry::GetBlackElo)
420 	);
421 	if (param == "welo") return std::stable_partition(itB, itE,
422 		SearchRangeElo(base, param.getValue(), &IndexEntry::GetWhiteElo)
423 	);
424 	if (param == "belo") return std::stable_partition(itB, itE,
425 		SearchRangeElo(base, param.getValue(), &IndexEntry::GetBlackElo)
426 	);
427 	if (param == "delo") return std::stable_partition(itB, itE,
428 		SearchRangeEloDiff(base, param.getValue(), &IndexEntry::GetWhiteElo, &IndexEntry::GetBlackElo)
429 	);
430 	if (param == "eco") return std::stable_partition(itB, itE,
431 		SearchRangeEco(base, param.getValue(), &IndexEntry::GetEcoCode)
432 	);
433 	if (param == "gnum") return std::stable_partition(itB, itE,
434 		SearchRangeGamenum(base, param.getValue())
435 	);
436 	if (param == "length") return std::stable_partition(itB, itE,
437 		SearchRange<ushort>(base, param.getValue(), &IndexEntry::GetNumHalfMoves)
438 	);
439 	if (param == "n_variations") return std::stable_partition(itB, itE,
440 		SearchRange<uint>(base, param.getValue(), &IndexEntry::GetVariationCount)
441 	);
442 	if (param == "n_comments") return std::stable_partition(itB, itE,
443 		SearchRange<uint>(base, param.getValue(), &IndexEntry::GetCommentCount)
444 	);
445 	if (param == "n_nags") return std::stable_partition(itB, itE,
446 		SearchRange<uint>(base, param.getValue(), &IndexEntry::GetNagCount)
447 	);
448 	if (param == "flag") return std::stable_partition(itB, itE,
449 		SearchFlag(base, param.getValue())
450 	);
451 	if (param == "result") return std::stable_partition(itB, itE,
452 		SearchResult(base, param.getValue())
453 	);
454 
455 	param.invalidate();
456 	return itR;
457 }
458 
459 } // End of anonymous namespace
460 
461 
462 /**
463  * search_index() - search for games using game's IndexEntry info
464  * @param base:     the scidBaseT to search
465  * @param filter:   the filter to be modified with the result of the search
466  *                  (RESET, AND, OR operations can be applied to the filter)
467  * @param argc:     number of argv elements
468  * @param argv:     an array of string pairs <criteria,value>
469  * @param progress: report search progress to UI
470  *
471  * This function perform a fast games search using the IndexEntry info.
472  * Criteria should start with the char '-' and can include the optional trailing chars
473  *     '!': meaning that we want the games that do _not_ match the <criteria,value> pair
474  *     '|': that allows to search for games that match at least one <criteria,value> in
475  *          a group of <criteria,value> pairs
476  *
477  * Names searches (-player, -white, -black, -event, -site -round) have special rules:
478  * - normal searches ignore case and spaces, so -player carlsen is equal to -player C a R L s en
479  * - player surname and name are usually separated by the char ,
480  * - exact searches (that do not ignore case and spaces) can be performed enclosing the searched
481  *   value in "" like -player "Carlsen, Magnus"
482  *
483  * Examples:
484  * -player "Carlsen, Magnus"
485  * means search for games played by (either as white or black) Carlsen (surname) Magnus (name)
486  *
487  * -player carlsen -black kramnik
488  * means (white name contains 'carlsen' || black name contains 'carlsen') && (black name contains 'kramnik')
489  *
490  * -white carlsen -black anand -black| kramnik -black| aronian
491  * means (white == carlsen) && (black == anand || black == kramnik || black == aronian)
492  * and will find the games played by carlsen as white against anand, or kramnik, or aronian
493  *
494  * -elo "2700 4000" -event! blitz
495  * means elo > 2700 && elo < 4000 && event != blitz
496  * and will find the games played by players with elo greater or equal than 2700, excluding the blitz events
497  *
498  * -welo "2700 4000" -belo|! "0 2700" -delo "-200 200"
499  * means (white elo > 2700 && white elo < 4000) || (belo is not in the range 0-2700) && ((welo - belo) > -200 && (welo - belo) < 200)
500  */
search_index(const scidBaseT * base,HFilter & filter,int argc,const char ** argv,const Progress & progress)501 errorT search_index(const scidBaseT* base, HFilter& filter, int argc,
502                     const char** argv, const Progress& progress) {
503 	ASSERT(base != 0);
504 	ASSERT(filter != 0);
505 
506 	filterOpT filterOp = FILTEROP_RESET;
507 	std::vector<SearchParam> params = parseParams(argc, argv, filterOp);
508 	std::vector<gamenumT> glist = collectGames(filter, filterOp);
509 
510 	// Partition glist so that the range [glist.begin(), it_res)
511 	// contains the matching games
512 	typedef std::vector<gamenumT>::iterator iter;
513 	iter it_begin = glist.begin();
514 	iter it_end = glist.end();
515 	iter it_res = glist.end();
516 	for (size_t i = 0, n = params.size(); i < n; i++) {
517 		if (!progress.report(i, n)) return ERROR_UserCancel;
518 
519 		if (params[i].isOr()) {
520 			it_begin = it_res;
521 			// Now the range [it_begin, it_end) include the games
522 			// that did not matched the previous AND criteria
523 		} else {
524 			it_end = it_res;
525 			it_begin = glist.begin();
526 			// Now the range [glist.begin(), it_res) include the games
527 			// that matched all the criteria until now.
528 			// Nonmatching games are in the range [it_end, glist.end())
529 			// and excluded from subsequent searches
530 		}
531 
532 		it_res = doSearch(it_begin, it_res, it_end, base, params[i]);
533 
534 		if (params[i].isNot()) {
535 			// The nonmatching games are in the range [it_res, it_end)
536 			// Rotate glist so that nonmatching games are in
537 			// the range [it_begin, it_res)
538 			std::rotate(it_begin, it_res, it_end);
539 			it_res = it_begin + (it_end - it_res);
540 		}
541 	}
542 
543 	if (filterOp == FILTEROP_AND) {
544 		// Remove nonmatching games from @filter
545 		for (iter it = it_res; it != glist.end(); ++it)
546 			filter->erase(*it);
547 	} else {
548 		// filterOp == FILTEROP_OR
549 		// Add matching games to @filter
550 		for (iter it = glist.begin(); it != it_res; ++it)
551 			filter->set(*it, 1);
552 	}
553 	progress.report(1,1);
554 
555 	return OK;
556 }
557 
558