1 /*
2  * Copyright (C) 2011  Gerd Lorscheid
3  * Copyright (C) 2011-2017  Fulvio Benini
4  *
5  * This file is part of Scid (Shane's Chess Information Database).
6  *
7  * Scid is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation.
10  *
11  * Scid is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with Scid.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 /** @file
21  * Implements the SortCache class, which sorts the games of an @e Index.
22  */
23 
24 #include "sortcache.h"
25 #include "hfilter.h"
26 #include "index.h"
27 #include "misc.h"
28 #include <algorithm>
29 #include <climits>
30 #include <cstring>
31 #include <numeric>
32 
33 #ifndef MULTITHREADING_OFF
34 #include <thread>
35 
36 /**
37  * Blocks the current thread until the thread @e th_ finishes its execution.
38  */
th_join()39 void SortCache::th_join() {
40 	if (th_ != nullptr) {
41 		static_cast<std::thread*>(th_)->join();
42 		delete static_cast<std::thread*>(th_);
43 		th_ = nullptr;
44 	}
45 }
46 
47 /**
48  * Populate @e fullMap_ with the sorted ids of all the games contained into @e
49  * index_. This function can run in a worker thread and can be interrupted.
50  * It is necessary to invoke th_interrupt() or th_join() before modifying the
51  * SortCache object, or the associated Index and NameBase objects.
52  */
th_sort()53 void SortCache::th_sort() {
54 	size_t nGames = this->nGames_;
55 	gamenumT* v = this->fullMap_;
56 	gamenumT* begin = v;
57 	gamenumT* end = v + nGames;
58 	auto comp = SortCache::CmpLess(this);
59 
60 	std::iota(begin, end, 0);
61 
62 	// An interruptible implementation of:
63 	// std::make_heap(v.begin(), v.end(), comp);
64 	ASSERT(nGames < size_t(INT_MAX / 2));
65 	const int lastNode = static_cast<int>(nGames) - 1;
66 	const auto lastRoot = (lastNode - 1) / 2;
67 	for (auto node = lastRoot; node >= 0; --node) {
68 		if (this->th_interrupt_)
69 			return;
70 		// Sift down @e node
71 		for (auto toSift = node;;) {
72 			const auto leftChild = 2 * toSift + 1;
73 			const auto rightChild = 2 * toSift + 2;
74 			if (leftChild > lastNode)
75 				break;
76 			int maxChild = (rightChild <= lastNode && comp(v[leftChild], v[rightChild]))
77 			                   ? rightChild
78 			                   : leftChild;
79 			if (!comp(v[toSift], v[maxChild]))
80 				break;
81 			std::swap(v[toSift], v[maxChild]);
82 			toSift = maxChild;
83 		}
84 	}
85 	ASSERT(std::is_heap(begin, end, comp));
86 
87 	// An interruptible implementation of:
88 	// std::sort_heap(v.begin(), v.end(), comp);
89 	for (auto it = end; it != begin; --it) {
90 		if (this->th_interrupt_)
91 			return;
92 		std::pop_heap(begin, it, comp);
93 	}
94 	ASSERT(std::is_sorted(begin, end, comp));
95 
96 	this->valid_fullMap_ = true;
97 }
98 
99 #else
th_join()100 void SortCache::th_join() {}
th_sort()101 void SortCache::th_sort() {}
102 #endif
103 
104 
SortCache(const Index * idx,const NameBase * nbase)105 SortCache::SortCache(const Index* idx, const NameBase* nbase)
106     : nGames_(0), valid_fullMap_(false), th_interrupt_(false),
107       partialHash_(false), fullMap_(NULL), th_(NULL), hash_(NULL), index_(idx),
108       nbase_(nbase), refCount_(0) {}
109 
~SortCache()110 SortCache::~SortCache() {
111 	th_interrupt();
112 	delete[] hash_;
113 	delete[] fullMap_;
114 }
115 
create(const Index * idx,const NameBase * nb,const char * criteria)116 SortCache* SortCache::create(const Index* idx, const NameBase* nb,
117                              const char* criteria) {
118 	ASSERT(idx != NULL && nb != NULL && criteria != NULL);
119 
120 	static const char fields[] = {
121 	    SORTING_date,       SORTING_year,         SORTING_event,
122 	    SORTING_site,       SORTING_round,        SORTING_white,
123 	    SORTING_black,      SORTING_eco,          SORTING_result,
124 	    SORTING_moveCount,  SORTING_avgElo,       SORTING_country,
125 	    SORTING_deleted,    SORTING_eventdate,    SORTING_whiteelo,
126 	    SORTING_blackelo,   SORTING_commentcount, SORTING_varcount,
127 	    SORTING_nagcount,   SORTING_resultwin,    SORTING_resultdraw,
128 	    SORTING_resultloss, SORTING_rating,       SORTING_number,
129 	    SORTING_sentinel};
130 	static const char* fields_end = fields + sizeof(fields);
131 
132 	if (*criteria == '\0') // Invalid empty criteria.
133 		return NULL;
134 
135 	SortCache* sc = new SortCache(idx, nb);
136 
137 	// Parse the sorting criteria.
138 	size_t i = 0;
139 	for (const char *it = criteria; *it != 0; ++it) {
140 		bool valid = std::find(fields, fields_end, *it) != fields_end;
141 		sc->criteria_[i++] = *it++;
142 		bool reverse = (*it != '+');
143 		sc->criteria_[i++] = reverse ? 1 : 0;
144 		if (!valid                         // Unknown field
145 		    || (reverse && *it != '-')     // Invalid asc/desc param
146 		    || i >= sizeof(sc->criteria_)) // No space left for SORTING_sentinel
147 		{
148 			delete sc;
149 			return NULL;
150 		}
151 	}
152 	sc->criteria_[i] = SORTING_sentinel;
153 
154 	sc->generateHashCache();
155 	sc->sortAsynchronously();
156 
157 	return sc;
158 }
159 
select(size_t row_offset,size_t row_count,const HFilter & filter,gamenumT * result) const160 size_t SortCache::select(size_t row_offset, size_t row_count,
161                          const HFilter& filter, gamenumT* result) const {
162 	ASSERT(filter != NULL && filter->size() <= nGames_);
163 	ASSERT(result != NULL);
164 
165 	size_t maxResults = filter->size();
166 	if (row_count == 0 || row_offset >= maxResults)
167 		return 0;
168 
169 	size_t row_end = std::min(row_offset + row_count, maxResults);
170 
171 	if (!valid_fullMap_) {
172 		gamenumT* v = new gamenumT[maxResults];
173 		gamenumT* v_end = v + maxResults;
174 		if (maxResults == nGames_) {
175 			// std::iota(v, v_end, 0);
176 			for (gamenumT i = 0; i < maxResults; ++i) {
177 				v[i] = i;
178 			}
179 		} else {
180 			std::copy(filter->begin(), filter->end(), v);
181 		}
182 
183 		size_t skip = 0;
184 		if (row_offset > 1000) {
185 			skip = row_offset;
186 			std::nth_element(v, v + row_offset, v_end, CmpLess(this));
187 		}
188 		std::partial_sort(v + skip, v + row_end, v_end, CmpLess(this));
189 		std::copy(v + row_offset, v + row_end, result);
190 		delete[] v;
191 	} else {
192 		if (maxResults == nGames_) {
193 			std::copy(fullMap_ + row_offset, fullMap_ + row_end, result);
194 		} else {
195 			size_t filterCount = 0;
196 			size_t i = 0;
197 			for (; filterCount < row_offset; i++) {
198 				if (filter->get(fullMap_[i]) != 0)
199 					filterCount++;
200 			}
201 			for (; filterCount != row_end; i++) {
202 				if (filter->get(fullMap_[i]) != 0) {
203 					*result++ = fullMap_[i];
204 					filterCount++;
205 				}
206 			}
207 		}
208 	}
209 
210 	return row_end - row_offset;
211 }
212 
sortedPosition(gamenumT gameId,const HFilter & filter) const213 size_t SortCache::sortedPosition(gamenumT gameId, const HFilter& filter) const {
214 	ASSERT(filter != 0 && filter->size() <= nGames_);
215 	ASSERT(gameId < nGames_ && filter->get(gameId) != 0);
216 
217 	size_t res = 0;
218 	if (valid_fullMap_) {
219 		for (gamenumT i = 0; i < nGames_; i++) {
220 			if (filter->get(fullMap_[i]) == 0)
221 				continue;
222 			if (fullMap_[i] == gameId)
223 				break;
224 			res++;
225 		}
226 	} else {
227 		CmpLess comp(this);
228 		for (gamenumT i = 0; i < nGames_; i++) {
229 			if (filter->get(i) == 0)
230 				continue;
231 			if (comp(i, gameId))
232 				++res;
233 		}
234 	}
235 	return res;
236 }
237 
checkForChanges(gamenumT id)238 void SortCache::checkForChanges(gamenumT id) {
239 	th_interrupt();
240 	if (id >= nGames_) {
241 		generateHashCache();
242 		sortAsynchronously();
243 	} else {
244 		hash_[id] = calcHash(id);
245 		if (valid_fullMap_) {
246 			gamenumT* begin = fullMap_;
247 			gamenumT* end = fullMap_ + nGames_;
248 			gamenumT* it = std::find(begin, end, id);
249 			ASSERT(it != end);
250 			// Reposition the game if necessary:
251 			// - to the left if it compares less than the previous element;
252 			// - to the right if the next element is lower.
253 			CmpLess comp(this);
254 			if (it != begin && comp(*it, *(it - 1))) {
255 				std::rotate(
256 					std::upper_bound(begin, it, *it, comp),
257 					it, it + 1);
258 			} else if (it + 1 != end && comp(*(it + 1), *it)) {
259 				std::rotate(
260 					it, it + 1,
261 					std::lower_bound(it + 1, end, *it, comp));
262 			}
263 			ASSERT(std::is_sorted(begin, end, comp));
264 		} else {
265 			sortAsynchronously();
266 		}
267 	}
268 }
269 
270 /*
271  * Calculate the hashes of all games and store them into @e hash_.
272  */
generateHashCache()273 void SortCache::generateHashCache() {
274 	ASSERT(th_ == nullptr);
275 
276 	valid_fullMap_ = false;
277 	nGames_ = index_->GetNumGames();
278 
279 	// Generate the hash table.
280 	delete[] hash_;
281 	hash_ = new uint32_t[nGames_];
282 	for (gamenumT i = 0; i < nGames_; i++) {
283 		hash_[i] = calcHash(i);
284 	}
285 }
286 
287 /*
288  * Start a background thread that will sort the gameIds and will populate @e
289  * fullMap_.
290  */
sortAsynchronously()291 void SortCache::sortAsynchronously() {
292 	ASSERT(th_ == nullptr);
293 
294 #ifndef MULTITHREADING_OFF
295 	delete[] fullMap_;
296 	fullMap_ = new gamenumT[nGames_];
297 	th_ = new std::thread(&SortCache::th_sort, this);
298 #endif
299 }
300 
301 /*
302  * Compare two games according to @e criteria_.
303  * The @e index_ is accessed only if the games' hashes are equal.
304  * @param g1: the id of the first game.
305  * @param g2: the id of the second game.
306  * @returns true if @e g1 is ordered before @e g2.
307  */
operator ()(gamenumT g1,gamenumT g2) const308 bool SortCache::CmpLess::operator()(gamenumT g1, gamenumT g2) const {
309 	ASSERT(g1 < sc_->nGames_ && g2 < sc_->nGames_);
310 
311 	if (sc_->hash_[g1] != sc_->hash_[g2])
312 		return sc_->hash_[g1] < sc_->hash_[g2];
313 	if (!sc_->partialHash_)
314 		return g1 < g2;
315 
316 	int cmp = sc_->fullCompare(g1, g2);
317 	if (cmp != 0)
318 		return cmp < 0;
319 
320 	return g1 < g2;
321 }
322 
323 static const int RESULT_SORT[] = { 0, 3, 1, 2 };
nameComp(const NameBase * nbase,nameT nt,idNumberT id1,idNumberT id2)324 static int nameComp(const NameBase* nbase, nameT nt, idNumberT id1,
325                     idNumberT id2) {
326 	ASSERT(nbase != NULL);
327 	return (id1 == id2) ? 0
328 	                    : strCaseCompare(nbase->GetName(nt, id1),
329 	                                     nbase->GetName(nt, id2));
330 }
331 
332 /*
333  * Compare two games according to @e criteria_.
334  * @param left: the id of the first game.
335  * @param right: the id of the second game.
336  * @returns
337  * - <0 if @e left is ordered before @e right.
338  * - >0 if @e right is ordered before @e left.
339  * - 0 otherwise.
340  */
fullCompare(gamenumT left,gamenumT right) const341 int SortCache::fullCompare(gamenumT left, gamenumT right) const {
342 	const IndexEntry *ie1 = index_->GetEntry(left);
343 	const IndexEntry *ie2 = index_->GetEntry(right);
344 
345 	for (const char* field = criteria_; *field != SORTING_sentinel; field += 2) {
346 		int res;
347 		switch (*field) {
348 		case SORTING_date:
349 			res = (int)ie1->GetDate() - (int)ie2->GetDate();
350 			break;
351 
352 		case SORTING_year:
353 			res = (int)ie1->GetYear() - (int)ie2->GetYear();
354 			break;
355 
356 		case SORTING_eco:
357 			res = (int)ie1->GetEcoCode() - (int)ie2->GetEcoCode();
358 			break;
359 
360 		case SORTING_moveCount:
361 			res = (int)ie1->GetNumHalfMoves() - (int)ie2->GetNumHalfMoves();
362 			break;
363 
364 		case SORTING_white:
365 			res = nameComp(nbase_, NAME_PLAYER, ie1->GetWhite(), ie2->GetWhite());
366 			break;
367 
368 		case SORTING_black:
369 			res = nameComp(nbase_, NAME_PLAYER, ie1->GetBlack(), ie2->GetBlack());
370 			break;
371 
372 		case SORTING_event:
373 			res = nameComp(nbase_, NAME_EVENT, ie1->GetEvent(), ie2->GetEvent());
374 			break;
375 
376 		case SORTING_site:
377 			res = nameComp(nbase_, NAME_SITE, ie1->GetSite(), ie2->GetSite());
378 			break;
379 
380 		case SORTING_round: {
381 			idNumberT id1 = ie1->GetRound();
382 			idNumberT id2 = ie2->GetRound();
383 			res = (id1 == id2)
384 			          ? 0
385 			          : strCompareRound(nbase_->GetName(NAME_ROUND, id1),
386 			                            nbase_->GetName(NAME_ROUND, id2));
387 			break;
388 		}
389 
390 		case SORTING_resultwin:
391 			res = (ie1->GetResult() == RESULT_White ? 1 : 0) - (ie2->GetResult() == RESULT_White ? 1 : 0);
392 			break;
393 
394 		case SORTING_resultdraw:
395 			res = (ie1->GetResult() == RESULT_Draw ? 1 : 0) - (ie2->GetResult() == RESULT_Draw ? 1 : 0);
396 			break;
397 
398 		case SORTING_resultloss:
399 			res = (ie1->GetResult() == RESULT_Black ? 1 : 0) - (ie2->GetResult() == RESULT_Black ? 1 : 0);
400 			break;
401 
402 		case SORTING_result:
403 			res = RESULT_SORT[ie1->GetResult()] - RESULT_SORT[ie2->GetResult()];
404 			break;
405 
406 		case SORTING_avgElo:  // Average Elo rating:
407 			{
408 				int r1 = ie1->GetWhiteElo(nbase_) + ie1->GetBlackElo(nbase_);
409 				int r2 = ie2->GetWhiteElo(nbase_) + ie2->GetBlackElo(nbase_);
410 				res = r1 - r2;
411 			}
412 			break;
413 
414 		case SORTING_country:  // Last 3 characters of site field:
415 			{
416 				const char* sOne = ie1->GetSiteName(nbase_);
417 				const char* sTwo = ie2->GetSiteName(nbase_);
418 				size_t slenOne = std::strlen(sOne);
419 				size_t slenTwo = std::strlen(sTwo);
420 				if (slenOne > 3) { sOne += slenOne - 3; }
421 				if (slenTwo > 3) { sTwo += slenTwo - 3; }
422 				res = strCaseCompare (sOne, sTwo);
423 			}
424 			break;
425 
426 		case SORTING_deleted:
427 			res = (int)ie1->GetDeleteFlag() - (int)ie2->GetDeleteFlag();
428 			break;
429 
430 		case SORTING_eventdate:
431 			res = (int)ie1->GetEventDate() - (int)ie2->GetEventDate();
432 			break;
433 
434 		case SORTING_whiteelo:
435 			res = (int)ie1->GetWhiteElo(nbase_) - (int)ie2->GetWhiteElo(nbase_);
436 			break;
437 
438 		case SORTING_blackelo:
439 			res = (int)ie1->GetBlackElo(nbase_) - (int)ie2->GetBlackElo(nbase_);
440 			break;
441 
442 		case SORTING_commentcount:
443 			res = (int) ie1->GetCommentCount() - (int) ie2->GetCommentCount();
444 			break;
445 
446 		case SORTING_varcount:
447 			res = (int) ie1->GetVariationCount() - (int) ie2->GetVariationCount();
448 			break;
449 
450 		case SORTING_nagcount:
451 			res = (int) ie1->GetNagCount() - (int) ie2->GetNagCount();
452 			break;
453 
454 		case SORTING_rating:
455 			res = (int)ie1->GetRating(nbase_) - (int)ie2->GetRating(nbase_);
456 			break;
457 
458 		case SORTING_number:
459 			res = (int) left - (int) right;
460 			break;
461 
462 		default:    // Should never happen:
463 			ASSERT(0);
464 			return 0;
465 		}
466 
467 		if (res != 0)
468 			return *(field + 1) ? -res : res;
469 	}
470 
471     return 0;
472 }
473 
474 /*
475  * Calculate an order-preserving hash corresponding to the current criteria.
476  * @param gameId: the id of the game whose hash should be calculated.
477  * @returns the hash value.
478  */
calcHash(gamenumT gameId)479 uint32_t SortCache::calcHash(gamenumT gameId) {
480 	uint64_t retValue = 0;
481 	const size_t nHashBits = 32;
482 	size_t totalBitsUsed = 0;
483 	const IndexEntry* ie = index_->GetEntry(gameId);
484 
485 	for (const char* field = criteria_; *field != SORTING_sentinel; ++field) {
486 		uint32_t value;
487 		size_t bitsUsed;
488 		switch (*field) {
489 			case SORTING_white:
490 				value = strStartHash(ie->GetWhiteName(nbase_));
491 				bitsUsed = nHashBits;
492 				partialHash_ = true;
493 				break;
494 			case SORTING_black:
495 				value = strStartHash(ie->GetBlackName(nbase_));
496 				bitsUsed = nHashBits;
497 				partialHash_ = true;
498 				break;
499 			case SORTING_site:
500 				value = strStartHash(ie->GetSiteName(nbase_));
501 				bitsUsed = nHashBits;
502 				partialHash_ = true;
503 				break;
504 			case SORTING_event:
505 				value = strStartHash(ie->GetEventName(nbase_));
506 				bitsUsed = nHashBits;
507 				partialHash_ = true;
508 				break;
509 			case SORTING_round:
510 				value = strGetUnsigned(ie->GetRoundName(nbase_));
511 				bitsUsed = nHashBits;
512 				partialHash_ = true;
513 				break;
514 			case SORTING_country:
515 			{
516 				const char *scountry = ie->GetSiteName (nbase_);
517 				size_t slen = std::strlen(scountry);
518 				if (slen > 3)
519 					scountry += slen - 3;
520 				value = strStartHash(scountry);
521 				bitsUsed = nHashBits;
522 				partialHash_ = true;
523 				break;
524 			}
525 			case SORTING_date:
526 				value = ie->GetDate();
527 				bitsUsed = 24;
528 				break;
529 			case SORTING_eventdate:
530 				value = ie->GetEventDate();
531 				bitsUsed = 32;
532 				break;
533 			case SORTING_year:
534 				value = ie->GetYear();
535 				bitsUsed = 16;
536 				break;
537 			case SORTING_whiteelo:
538 				value = ie->GetWhiteElo(nbase_);
539 				bitsUsed = 16;
540 				break;
541 			case SORTING_blackelo:
542 				value = ie->GetBlackElo(nbase_);
543 				bitsUsed = 16;
544 				break;
545 			case SORTING_avgElo:
546 				value = ie->GetWhiteElo(nbase_) + ie->GetBlackElo(nbase_);
547 				bitsUsed = 16;
548 				break;
549 			case SORTING_result:
550 				value = RESULT_SORT[ie->GetResult()];
551 				bitsUsed = 8;
552 				break;
553 			case SORTING_resultwin:
554 				value = ie->GetResult() == RESULT_White ? 1 : 0;
555 				bitsUsed = 8;
556 				break;
557 			case SORTING_resultdraw:
558 				value = ie->GetResult() == RESULT_Draw ? 1 : 0;
559 				bitsUsed = 8;
560 				break;
561 			case SORTING_resultloss:
562 				value = ie->GetResult() == RESULT_Black ? 1 : 0;
563 				bitsUsed = 8;
564 				break;
565 			case SORTING_moveCount:
566 				value = ie->GetNumHalfMoves();
567 				bitsUsed = 16;
568 				break;
569 			case SORTING_eco:
570 				value = ie->GetEcoCode();
571 				bitsUsed = 16;
572 				break;
573 			case SORTING_commentcount:
574 				value = ie->GetCommentCount();
575 				bitsUsed = 16;
576 				break;
577 			case SORTING_varcount:
578 				value = ie->GetVariationCount();
579 				bitsUsed = 16;
580 				break;
581 			case SORTING_nagcount:
582 				value = ie->GetNagCount();
583 				bitsUsed = 16;
584 				break;
585 			case SORTING_deleted:
586 				value = (ie->GetDeleteFlag() ? 1 : 0);
587 				bitsUsed = 8;
588 				break;
589 			case SORTING_rating:
590 				value = ie->GetRating(nbase_);
591 				bitsUsed = 8;
592 				break;
593 			case SORTING_number:
594 				value = gameId;
595 				bitsUsed = 32;
596 				break;
597 			default:    // Should never happen:
598 				ASSERT(0);
599 				partialHash_ = true;
600 				return 0;
601 		}
602 
603 		// If reverse order, just negate the cache value
604 		if (*++field) {
605 			value = ~value;
606 			if (sizeof(value) * 8 > bitsUsed) {
607 				// Clear the unused top bits
608 				value <<= sizeof(value) * 8 - bitsUsed;
609 				value >>= sizeof(value) * 8 - bitsUsed;
610 			}
611 		}
612 		// Combine with previous hash value
613 		retValue <<= bitsUsed;
614 		retValue |= value;
615 		totalBitsUsed += bitsUsed;
616 
617 		// If not all search attributes fit, then it is a partial hash
618 		if (totalBitsUsed > nHashBits) {
619 			retValue >>= totalBitsUsed - nHashBits;
620 			partialHash_ = true;
621 			break;
622 		}
623 		if (totalBitsUsed == nHashBits) {
624 			if (*(field + 1) != SORTING_sentinel)
625 				partialHash_ = true;
626 			break;
627 		}
628 	}
629 
630 	return static_cast<uint32_t>(retValue);
631 }
632