1 //
2 // $Id$
3 //
4 
5 //
6 // Copyright (c) 2001-2016, Andrew Aksyonoff
7 // Copyright (c) 2008-2016, Sphinx Technologies Inc
8 // All rights reserved
9 //
10 // This program is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License. You should have
12 // received a copy of the GPL license along with this program; if you
13 // did not, you can find it at http://www.gnu.org/
14 //
15 
16 #include "sphinx.h"
17 #include "sphinxint.h"
18 #include "sphinxjson.h"
19 
20 #include <time.h>
21 #include <math.h>
22 
23 #if !USE_WINDOWS
24 #include <unistd.h>
25 #include <sys/time.h>
26 #endif
27 
28 //////////////////////////////////////////////////////////////////////////
29 // TRAITS
30 //////////////////////////////////////////////////////////////////////////
31 
SetState(const CSphMatchComparatorState & tState)32 void ISphMatchSorter::SetState ( const CSphMatchComparatorState & tState )
33 {
34 	m_tState = tState;
35 	m_tState.m_iNow = (DWORD) time ( NULL );
36 }
37 
38 
39 /// groupby key type
40 typedef int64_t				SphGroupKey_t;
41 
42 
43 /// base grouper (class that computes groupby key)
44 class CSphGrouper
45 {
46 public:
~CSphGrouper()47 	virtual					~CSphGrouper () {}
48 	virtual SphGroupKey_t	KeyFromValue ( SphAttr_t uValue ) const = 0;
49 	virtual SphGroupKey_t	KeyFromMatch ( const CSphMatch & tMatch ) const = 0;
50 	virtual void			GetLocator ( CSphAttrLocator & tOut ) const = 0;
51 	virtual ESphAttr		GetResultType () const = 0;
SetStringPool(const BYTE *)52 	virtual void			SetStringPool ( const BYTE * ) {}
CanMulti() const53 	virtual bool			CanMulti () const { return true; }
54 };
55 
56 
HasString(const CSphMatchComparatorState * pState)57 static bool HasString ( const CSphMatchComparatorState * pState )
58 {
59 	assert ( pState );
60 
61 	for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
62 	{
63 		if ( pState->m_eKeypart[i]==SPH_KEYPART_STRING || pState->m_eKeypart[i]==SPH_KEYPART_STRINGPTR || ( pState->m_tSubKeys[i].m_sKey.cstr() ) )
64 			return true;
65 	}
66 
67 	return false;
68 }
69 
70 
71 /// match-sorting priority queue traits
72 class CSphMatchQueueTraits : public ISphMatchSorter, ISphNoncopyable
73 {
74 protected:
75 	CSphMatch *					m_pData;
76 	int							m_iUsed;
77 	int							m_iSize;
78 	const bool					m_bUsesAttrs;
79 
80 private:
81 	const int					m_iDataLength;
82 
83 public:
84 	/// ctor
CSphMatchQueueTraits(int iSize,bool bUsesAttrs)85 	CSphMatchQueueTraits ( int iSize, bool bUsesAttrs )
86 		: m_iUsed ( 0 )
87 		, m_iSize ( iSize )
88 		, m_bUsesAttrs ( bUsesAttrs )
89 		, m_iDataLength ( iSize )
90 	{
91 		assert ( iSize>0 );
92 		m_pData = new CSphMatch [ m_iDataLength ];
93 		assert ( m_pData );
94 
95 		m_tState.m_iNow = (DWORD) time ( NULL );
96 		m_iMatchCapacity = m_iDataLength;
97 	}
98 
99 	/// dtor
~CSphMatchQueueTraits()100 	virtual ~CSphMatchQueueTraits ()
101 	{
102 		for ( int i=0; i<m_iDataLength; ++i )
103 			m_tSchema.FreeStringPtrs ( m_pData+i );
104 		SafeDeleteArray ( m_pData );
105 	}
106 
107 public:
UsesAttrs() const108 	bool				UsesAttrs () const										{ return m_bUsesAttrs; }
GetLength() const109 	virtual int			GetLength () const										{ return m_iUsed; }
GetDataLength() const110 	virtual int			GetDataLength () const									{ return m_iDataLength; }
111 
CanMulti() const112 	virtual bool CanMulti () const
113 	{
114 		return !HasString ( &m_tState );
115 	}
116 };
117 
118 //////////////////////////////////////////////////////////////////////////
119 // SORTING QUEUES
120 //////////////////////////////////////////////////////////////////////////
121 
122 template < typename COMP >
123 struct CompareIndex_fn
124 {
125 	const CSphMatch * m_pBase;
126 	const CSphMatchComparatorState * m_pState;
127 
CompareIndex_fnCompareIndex_fn128 	CompareIndex_fn ( const CSphMatch * pBase, const CSphMatchComparatorState * pState )
129 		: m_pBase ( pBase )
130 		, m_pState ( pState )
131 	{}
132 
IsLessCompareIndex_fn133 	bool IsLess ( int a, int b ) const
134 	{
135 		return COMP::IsLess ( m_pBase[b], m_pBase[a], *m_pState );
136 	}
137 };
138 
139 /// heap sorter
140 /// plain binary heap based PQ
141 template < typename COMP, bool NOTIFICATIONS >
142 class CSphMatchQueue : public CSphMatchQueueTraits
143 {
144 public:
145 	/// ctor
CSphMatchQueue(int iSize,bool bUsesAttrs)146 	CSphMatchQueue ( int iSize, bool bUsesAttrs )
147 		: CSphMatchQueueTraits ( iSize, bUsesAttrs )
148 	{
149 		if_const ( NOTIFICATIONS )
150 			m_dJustPopped.Reserve(1);
151 	}
152 
153 	/// check if this sorter does groupby
IsGroupby() const154 	virtual bool IsGroupby () const
155 	{
156 		return false;
157 	}
158 
GetWorst() const159 	virtual const CSphMatch * GetWorst() const
160 	{
161 		return m_pData;
162 	}
163 
164 	/// add entry to the queue
Push(const CSphMatch & tEntry)165 	virtual bool Push ( const CSphMatch & tEntry )
166 	{
167 		m_iTotal++;
168 
169 		if_const ( NOTIFICATIONS )
170 		{
171 			m_iJustPushed = 0;
172 			m_dJustPopped.Resize(0);
173 		}
174 
175 		if ( m_iUsed==m_iSize )
176 		{
177 			// if it's worse that current min, reject it, else pop off current min
178 			if ( COMP::IsLess ( tEntry, m_pData[0], m_tState ) )
179 				return true;
180 			else
181 				Pop ();
182 		}
183 
184 		// do add
185 		m_tSchema.CloneMatch ( m_pData+m_iUsed, tEntry );
186 
187 		if_const ( NOTIFICATIONS )
188 			m_iJustPushed = tEntry.m_uDocID;
189 
190 		int iEntry = m_iUsed++;
191 
192 		// sift up if needed, so that worst (lesser) ones float to the top
193 		while ( iEntry )
194 		{
195 			int iParent = ( iEntry-1 ) >> 1;
196 			if ( !COMP::IsLess ( m_pData[iEntry], m_pData[iParent], m_tState ) )
197 				break;
198 
199 			// entry is less than parent, should float to the top
200 			Swap ( m_pData[iEntry], m_pData[iParent] );
201 			iEntry = iParent;
202 		}
203 
204 		return true;
205 	}
206 
207 	/// add grouped entry (must not happen)
PushGrouped(const CSphMatch &,bool)208 	virtual bool PushGrouped ( const CSphMatch &, bool )
209 	{
210 		assert ( 0 );
211 		return false;
212 	}
213 
214 	/// remove root (ie. top priority) entry
Pop()215 	virtual void Pop ()
216 	{
217 		assert ( m_iUsed );
218 		if ( !(--m_iUsed) ) // empty queue? just return
219 			return;
220 
221 		// make the last entry my new root
222 		Swap ( m_pData[0], m_pData[m_iUsed] );
223 		m_tSchema.FreeStringPtrs ( &m_pData[m_iUsed] );
224 
225 		if_const ( NOTIFICATIONS )
226 		{
227 			if ( m_dJustPopped.GetLength() )
228 				m_dJustPopped[0] = m_pData[m_iUsed].m_uDocID;
229 			else
230 				m_dJustPopped.Add ( m_pData[m_iUsed].m_uDocID );
231 		}
232 
233 		// sift down if needed
234 		int iEntry = 0;
235 		for ( ;; )
236 		{
237 			// select child
238 			int iChild = (iEntry<<1) + 1;
239 			if ( iChild>=m_iUsed )
240 				break;
241 
242 			// select smallest child
243 			if ( iChild+1<m_iUsed )
244 				if ( COMP::IsLess ( m_pData[iChild+1], m_pData[iChild], m_tState ) )
245 					iChild++;
246 
247 			// if smallest child is less than entry, do float it to the top
248 			if ( COMP::IsLess ( m_pData[iChild], m_pData[iEntry], m_tState ) )
249 			{
250 				Swap ( m_pData[iChild], m_pData[iEntry] );
251 				iEntry = iChild;
252 				continue;
253 			}
254 
255 			break;
256 		}
257 	}
258 
259 	/// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch * pTo,int iTag)260 	int Flatten ( CSphMatch * pTo, int iTag )
261 	{
262 		assert ( m_iUsed>=0 );
263 		pTo += m_iUsed;
264 		int iCopied = m_iUsed;
265 		while ( m_iUsed>0 )
266 		{
267 			--pTo;
268 			m_tSchema.FreeStringPtrs ( pTo );
269 			Swap ( *pTo, *m_pData );
270 			if ( iTag>=0 )
271 				pTo->m_iTag = iTag;
272 			Pop ();
273 		}
274 		m_iTotal = 0;
275 		return iCopied;
276 	}
277 
Finalize(ISphMatchProcessor & tProcessor,bool bCallProcessInResultSetOrder)278 	void Finalize ( ISphMatchProcessor & tProcessor, bool bCallProcessInResultSetOrder )
279 	{
280 		if ( !GetLength() )
281 			return;
282 
283 		if ( !bCallProcessInResultSetOrder )
284 		{
285 			// just evaluate in heap order
286 			CSphMatch * pCur = m_pData;
287 			const CSphMatch * pEnd = m_pData + m_iUsed;
288 			while ( pCur<pEnd )
289 			{
290 				tProcessor.Process ( pCur++ );
291 			}
292 		} else
293 		{
294 			// means final-stage calls will be evaluated
295 			// a) over the final, pre-limit result set
296 			// b) in the final result set order
297 			CSphFixedVector<int> dIndexes ( GetLength() );
298 			ARRAY_FOREACH ( i, dIndexes )
299 				dIndexes[i] = i;
300 			sphSort ( dIndexes.Begin(), dIndexes.GetLength(), CompareIndex_fn<COMP> ( m_pData, &m_tState ) );
301 
302 			ARRAY_FOREACH ( i, dIndexes )
303 			{
304 				tProcessor.Process ( m_pData + dIndexes[i] );
305 			}
306 		}
307 	}
308 };
309 
310 //////////////////////////////////////////////////////////////////////////
311 
312 /// match sorting functor
313 template < typename COMP >
314 struct MatchSort_fn : public MatchSortAccessor_t
315 {
316 	CSphMatchComparatorState	m_tState;
317 
MatchSort_fnMatchSort_fn318 	explicit MatchSort_fn ( const CSphMatchComparatorState & tState )
319 		: m_tState ( tState )
320 	{}
321 
IsLessMatchSort_fn322 	bool IsLess ( const MEDIAN_TYPE a, const MEDIAN_TYPE b )
323 	{
324 		return COMP::IsLess ( *a, *b, m_tState );
325 	}
326 };
327 
328 
329 /// K-buffer (generalized double buffer) sorter
330 /// faster worst-case but slower average-case than the heap sorter
331 template < typename COMP, bool NOTIFICATIONS >
332 class CSphKbufferMatchQueue : public CSphMatchQueueTraits
333 {
334 protected:
335 	static const int	COEFF = 4;
336 
337 	CSphMatch *			m_pEnd;
338 	CSphMatch *			m_pWorst;
339 	bool				m_bFinalized;
340 
341 public:
342 	/// ctor
CSphKbufferMatchQueue(int iSize,bool bUsesAttrs)343 	CSphKbufferMatchQueue ( int iSize, bool bUsesAttrs )
344 		: CSphMatchQueueTraits ( iSize*COEFF, bUsesAttrs )
345 		, m_pEnd ( m_pData+iSize*COEFF )
346 		, m_pWorst ( NULL )
347 		, m_bFinalized ( false )
348 	{
349 		if_const ( NOTIFICATIONS )
350 			m_dJustPopped.Reserve ( m_iSize );
351 
352 		m_iSize /= COEFF;
353 	}
354 
355 	/// check if this sorter does groupby
IsGroupby() const356 	virtual bool IsGroupby () const
357 	{
358 		return false;
359 	}
360 
361 	/// add entry to the queue
Push(const CSphMatch & tEntry)362 	virtual bool Push ( const CSphMatch & tEntry )
363 	{
364 		if_const ( NOTIFICATIONS )
365 		{
366 			m_iJustPushed = 0;
367 			m_dJustPopped.Resize(0);
368 		}
369 
370 		// quick early rejection checks
371 		m_iTotal++;
372 		if ( m_pWorst && COMP::IsLess ( tEntry, *m_pWorst, m_tState ) )
373 			return true;
374 
375 		// quick check passed
376 		// fill the data, back to front
377 		m_bFinalized = false;
378 		m_iUsed++;
379 		m_tSchema.CloneMatch ( m_pEnd-m_iUsed, tEntry );
380 
381 		if_const ( NOTIFICATIONS )
382 			m_iJustPushed = tEntry.m_uDocID;
383 
384 		// do the initial sort once
385 		if ( m_iTotal==m_iSize )
386 		{
387 			assert ( m_iUsed==m_iSize && !m_pWorst );
388 			MatchSort_fn<COMP> tComp ( m_tState );
389 			sphSort ( m_pEnd-m_iSize, m_iSize, tComp, tComp );
390 			m_pWorst = m_pEnd-m_iSize;
391 			m_bFinalized = true;
392 			return true;
393 		}
394 
395 		// do the sort/cut when the K-buffer is full
396 		if ( m_iUsed==m_iSize*COEFF )
397 		{
398 			MatchSort_fn<COMP> tComp ( m_tState );
399 			sphSort ( m_pData, m_iUsed, tComp, tComp );
400 
401 			if_const ( NOTIFICATIONS )
402 			{
403 				for ( CSphMatch * pMatch = m_pData; pMatch < m_pEnd-m_iSize; pMatch++ )
404 					m_dJustPopped.Add ( pMatch->m_uDocID );
405 			}
406 
407 			m_iUsed = m_iSize;
408 			m_pWorst = m_pEnd-m_iSize;
409 			m_bFinalized = true;
410 		}
411 		return true;
412 	}
413 
414 	/// add grouped entry (must not happen)
PushGrouped(const CSphMatch &,bool)415 	virtual bool PushGrouped ( const CSphMatch &, bool )
416 	{
417 		assert ( 0 );
418 		return false;
419 	}
420 
421 	/// finalize, perform final sort/cut as needed
Finalize(ISphMatchProcessor & tProcessor,bool)422 	virtual void Finalize ( ISphMatchProcessor & tProcessor, bool )
423 	{
424 		if ( !GetLength() )
425 			return;
426 
427 		if ( !m_bFinalized )
428 		{
429 			MatchSort_fn<COMP> tComp ( m_tState );
430 			sphSort ( m_pEnd-m_iUsed, m_iUsed, tComp, tComp );
431 			m_iUsed = Min ( m_iUsed, m_iSize );
432 			m_bFinalized = true;
433 		}
434 
435 		// reverse order iteration
436 		CSphMatch * pCur = m_pEnd - 1;
437 		const CSphMatch * pEnd = m_pEnd - m_iUsed;
438 		while ( pCur>=pEnd )
439 		{
440 			tProcessor.Process ( pCur-- );
441 		}
442 	}
443 
444 	/// current result set length
GetLength() const445 	virtual int GetLength () const
446 	{
447 		return Min ( m_iUsed, m_iSize );
448 	}
449 
450 	/// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch * pTo,int iTag)451 	int Flatten ( CSphMatch * pTo, int iTag )
452 	{
453 		const CSphMatch * pBegin = pTo;
454 
455 		// ensure we are sorted
456 		if ( m_iUsed )
457 		{
458 			MatchSort_fn<COMP> tComp ( m_tState );
459 			sphSort ( m_pEnd-m_iUsed, m_iUsed, tComp, tComp );
460 		}
461 
462 		// reverse copy
463 		for ( int i=1; i<=Min ( m_iUsed, m_iSize ); i++ )
464 		{
465 			m_tSchema.FreeStringPtrs ( pTo );
466 			Swap ( *pTo, m_pEnd[-i] );
467 			if ( iTag>=0 )
468 				pTo->m_iTag = iTag;
469 			pTo++;
470 		}
471 
472 		// clean up for the next work session
473 		m_iTotal = 0;
474 		m_iUsed = 0;
475 		m_iSize = 0;
476 		m_bFinalized = false;
477 
478 		return ( pTo-pBegin );
479 	}
480 };
481 
482 //////////////////////////////////////////////////////////////////////////
483 
484 /// collector for UPDATE statement
485 class CSphUpdateQueue : public CSphMatchQueueTraits
486 {
487 	CSphAttrUpdate		m_tWorkSet;
488 	CSphIndex *			m_pIndex;
489 	CSphString *		m_pError;
490 	CSphString *		m_pWarning;
491 	int *				m_pAffected;
492 
493 private:
DoUpdate()494 	void DoUpdate()
495 	{
496 		if ( !m_iUsed )
497 			return;
498 
499 		m_tWorkSet.m_dRowOffset.Resize ( m_iUsed );
500 		m_tWorkSet.m_dDocids.Resize ( m_iUsed );
501 		m_tWorkSet.m_dRows.Resize ( m_iUsed );
502 
503 		ARRAY_FOREACH ( i, m_tWorkSet.m_dDocids )
504 		{
505 			m_tWorkSet.m_dRowOffset[i] = 0;
506 			m_tWorkSet.m_dDocids[i] = 0;
507 			m_tWorkSet.m_dRows[i] = NULL;
508 			if ( !DOCINFO2ID ( STATIC2DOCINFO ( m_pData[i].m_pStatic ) ) ) // if static attributes were copied, so, they actually dynamic
509 			{
510 				m_tWorkSet.m_dDocids[i] = m_pData[i].m_uDocID;
511 			} else // static attributes points to the active indexes - so, no lookup, 5 times faster update.
512 			{
513 				m_tWorkSet.m_dRows[i] = m_pData[i].m_pStatic - ( sizeof(SphDocID_t) / sizeof(CSphRowitem) );
514 			}
515 		}
516 
517 		*m_pAffected += m_pIndex->UpdateAttributes ( m_tWorkSet, -1, *m_pError, *m_pWarning );
518 		m_iUsed = 0;
519 	}
520 public:
521 	/// ctor
CSphUpdateQueue(int iSize,CSphAttrUpdateEx * pUpdate,bool bIgnoreNonexistent,bool bStrict)522 	CSphUpdateQueue ( int iSize, CSphAttrUpdateEx * pUpdate, bool bIgnoreNonexistent, bool bStrict )
523 		: CSphMatchQueueTraits ( iSize, true )
524 	{
525 		m_tWorkSet.m_dRowOffset.Reserve ( m_iSize );
526 		m_tWorkSet.m_dDocids.Reserve ( m_iSize );
527 		m_tWorkSet.m_dRows.Reserve ( m_iSize );
528 
529 		m_tWorkSet.m_bIgnoreNonexistent = bIgnoreNonexistent;
530 		m_tWorkSet.m_bStrict = bStrict;
531 		m_tWorkSet.m_dTypes = pUpdate->m_pUpdate->m_dTypes;
532 		m_tWorkSet.m_dPool = pUpdate->m_pUpdate->m_dPool;
533 		m_tWorkSet.m_dAttrs.Resize ( pUpdate->m_pUpdate->m_dAttrs.GetLength() );
534 		ARRAY_FOREACH ( i, m_tWorkSet.m_dAttrs )
535 		{
536 			CSphString sTmp;
537 			sTmp = pUpdate->m_pUpdate->m_dAttrs[i];
538 			m_tWorkSet.m_dAttrs[i] = sTmp.Leak();
539 		}
540 
541 		m_pIndex = pUpdate->m_pIndex;
542 		m_pError = pUpdate->m_pError;
543 		m_pWarning = pUpdate->m_pWarning;
544 		m_pAffected = &pUpdate->m_iAffected;
545 	}
546 
547 	/// check if this sorter does groupby
IsGroupby() const548 	virtual bool IsGroupby () const
549 	{
550 		return false;
551 	}
552 
553 	/// add entry to the queue
Push(const CSphMatch & tEntry)554 	virtual bool Push ( const CSphMatch & tEntry )
555 	{
556 		m_iTotal++;
557 
558 		if ( m_iUsed==m_iSize )
559 			DoUpdate();
560 
561 		// do add
562 		m_tSchema.CloneMatch ( &m_pData[m_iUsed++], tEntry );
563 		return true;
564 	}
565 
566 	/// add grouped entry (must not happen)
PushGrouped(const CSphMatch &,bool)567 	virtual bool PushGrouped ( const CSphMatch &, bool )
568 	{
569 		assert ( 0 );
570 		return false;
571 	}
572 
573 	/// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch *,int)574 	int Flatten ( CSphMatch *, int )
575 	{
576 		assert ( m_iUsed>=0 );
577 		DoUpdate();
578 		m_iTotal = 0;
579 		return 0;
580 	}
581 
Finalize(ISphMatchProcessor & tProcessor,bool)582 	virtual void Finalize ( ISphMatchProcessor & tProcessor, bool )
583 	{
584 		if ( !GetLength() )
585 			return;
586 
587 		// just evaluate in heap order
588 		CSphMatch * pCur = m_pData;
589 		const CSphMatch * pEnd = m_pData + m_iUsed;
590 		while ( pCur<pEnd )
591 		{
592 			tProcessor.Process ( pCur++ );
593 		}
594 	}
595 };
596 
597 //////////////////////////////////////////////////////////////////////////
598 
599 /// collector for DELETE statement
600 class CSphDeleteQueue : public CSphMatchQueueTraits
601 {
602 	CSphVector<SphDocID_t>* m_pValues;
603 public:
604 	/// ctor
CSphDeleteQueue(int iSize,CSphVector<SphDocID_t> * pDeletes)605 	CSphDeleteQueue ( int iSize, CSphVector<SphDocID_t> * pDeletes )
606 		: CSphMatchQueueTraits ( 1, false )
607 		, m_pValues ( pDeletes )
608 	{
609 		m_pValues->Reserve ( iSize );
610 	}
611 
612 	/// check if this sorter does groupby
IsGroupby() const613 	virtual bool IsGroupby () const
614 	{
615 		return false;
616 	}
617 
618 	/// add entry to the queue
Push(const CSphMatch & tEntry)619 	virtual bool Push ( const CSphMatch & tEntry )
620 	{
621 		m_iTotal++;
622 		m_pValues->Add ( tEntry.m_uDocID );
623 		return true;
624 	}
625 
626 	/// add grouped entry (must not happen)
PushGrouped(const CSphMatch &,bool)627 	virtual bool PushGrouped ( const CSphMatch &, bool )
628 	{
629 		assert ( 0 );
630 		return false;
631 	}
632 
633 	/// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch *,int)634 	int Flatten ( CSphMatch *, int )
635 	{
636 		m_iTotal = 0;
637 		return 0;
638 	}
639 
Finalize(ISphMatchProcessor &,bool)640 	virtual void Finalize ( ISphMatchProcessor &, bool )
641 	{}
642 };
643 
644 //////////////////////////////////////////////////////////////////////////
645 // SORTING+GROUPING QUEUE
646 //////////////////////////////////////////////////////////////////////////
647 
IsCount(const CSphString & s)648 static bool IsCount ( const CSphString & s )
649 {
650 	return s=="@count" || s=="count(*)";
651 }
652 
IsGroupby(const CSphString & s)653 static bool IsGroupby ( const CSphString & s )
654 {
655 	return s=="@groupby" || s=="@distinct" || s=="groupby()" || s=="@groupbystr";
656 }
657 
IsGroupbyMagic(const CSphString & s)658 static bool IsGroupbyMagic ( const CSphString & s )
659 {
660 	return IsGroupby ( s ) || IsCount ( s );
661 }
662 
663 /// groupers
664 #define GROUPER_BEGIN(_name) \
665 	class _name : public CSphGrouper \
666 	{ \
667 	protected: \
668 		CSphAttrLocator m_tLocator; \
669 	public: \
670 		explicit _name ( const CSphAttrLocator & tLoc ) : m_tLocator ( tLoc ) {} \
671 		virtual void GetLocator ( CSphAttrLocator & tOut ) const { tOut = m_tLocator; } \
672 		virtual ESphAttr GetResultType () const { return m_tLocator.m_iBitCount>8*(int)sizeof(DWORD) ? SPH_ATTR_BIGINT : SPH_ATTR_INTEGER; } \
673 		virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const { return KeyFromValue ( tMatch.GetAttr ( m_tLocator ) ); } \
674 		virtual SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const \
675 		{
676 // NOLINT
677 
678 #define GROUPER_END \
679 		} \
680 	};
681 
682 
683 #define GROUPER_BEGIN_SPLIT(_name) \
684 	GROUPER_BEGIN(_name) \
685 	time_t tStamp = (time_t)uValue; \
686 	struct tm tSplit; \
687 	localtime_r ( &tStamp, &tSplit );
688 
689 
690 GROUPER_BEGIN ( CSphGrouperAttr )
691 	return uValue;
692 GROUPER_END
693 
694 
695 GROUPER_BEGIN_SPLIT ( CSphGrouperDay )
696 	return (tSplit.tm_year+1900)*10000 + (1+tSplit.tm_mon)*100 + tSplit.tm_mday;
697 GROUPER_END
698 
699 
700 GROUPER_BEGIN_SPLIT ( CSphGrouperWeek )
701 	int iPrevSunday = (1+tSplit.tm_yday) - tSplit.tm_wday; // prev Sunday day of year, base 1
702 	int iYear = tSplit.tm_year+1900;
703 	if ( iPrevSunday<=0 ) // check if we crossed year boundary
704 	{
705 		// adjust day and year
706 		iPrevSunday += 365;
707 		iYear--;
708 
709 		// adjust for leap years
710 		if ( iYear%4==0 && ( iYear%100!=0 || iYear%400==0 ) )
711 			iPrevSunday++;
712 	}
713 	return iYear*1000 + iPrevSunday;
714 GROUPER_END
715 
716 
717 GROUPER_BEGIN_SPLIT ( CSphGrouperMonth )
718 	return (tSplit.tm_year+1900)*100 + (1+tSplit.tm_mon);
719 GROUPER_END
720 
721 
722 GROUPER_BEGIN_SPLIT ( CSphGrouperYear )
723 	return (tSplit.tm_year+1900);
724 GROUPER_END
725 
726 template <class PRED>
727 class CSphGrouperString : public CSphGrouperAttr, public PRED
728 {
729 protected:
730 	const BYTE * m_pStringBase;
731 
732 public:
733 
CSphGrouperString(const CSphAttrLocator & tLoc)734 	explicit CSphGrouperString ( const CSphAttrLocator & tLoc )
735 		: CSphGrouperAttr ( tLoc )
736 		, m_pStringBase ( NULL )
737 	{
738 	}
739 
GetResultType() const740 	virtual ESphAttr GetResultType () const
741 	{
742 		return SPH_ATTR_BIGINT;
743 	}
744 
KeyFromValue(SphAttr_t uValue) const745 	virtual SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const
746 	{
747 		if ( !m_pStringBase || !uValue )
748 			return 0;
749 
750 		const BYTE * pStr = NULL;
751 		int iLen = sphUnpackStr ( m_pStringBase+uValue, &pStr );
752 
753 		if ( !pStr || !iLen )
754 			return 0;
755 
756 		return PRED::Hash ( pStr, iLen );
757 	}
758 
SetStringPool(const BYTE * pStrings)759 	virtual void SetStringPool ( const BYTE * pStrings )
760 	{
761 		m_pStringBase = pStrings;
762 	}
763 
CanMulti() const764 	virtual bool CanMulti () const { return false; }
765 };
766 
767 
768 class BinaryHash_fn
769 {
770 public:
Hash(const BYTE * pStr,int iLen,uint64_t uPrev=SPH_FNV64_SEED) const771 	uint64_t Hash ( const BYTE * pStr, int iLen, uint64_t uPrev=SPH_FNV64_SEED ) const
772 	{
773 		assert ( pStr && iLen );
774 		return sphFNV64 ( pStr, iLen, uPrev );
775 	}
776 };
777 
778 
779 template < typename T >
FormatInt(char sBuf[32],T v)780 inline static char * FormatInt ( char sBuf[32], T v )
781 {
782 	if_const ( sizeof(T)==4 && v==INT_MIN )
783 		return strncpy ( sBuf, "-2147483648", 32 );
784 	if_const ( sizeof(T)==8 && v==LLONG_MIN )
785 		return strncpy ( sBuf, "-9223372036854775808", 32 );
786 
787 	bool s = ( v<0 );
788 	if ( s )
789 		v = -v;
790 
791 	char * p = sBuf+31;
792 	*p = 0;
793 	do
794 	{
795 		*--p = '0' + char ( v % 10 );
796 		v /= 10;
797 	} while ( v );
798 	if ( s )
799 		*--p = '-';
800 	return p;
801 }
802 
803 
804 /// lookup JSON key, group by looked up value (used in CSphKBufferJsonGroupSorter)
805 class CSphGrouperJsonField : public CSphGrouper
806 {
807 protected:
808 	CSphAttrLocator m_tLocator;
809 
810 public:
811 	ISphExpr * m_pExpr;
812 	const BYTE * m_pStrings;
813 
CSphGrouperJsonField(const CSphAttrLocator & tLoc,ISphExpr * pExpr)814 	explicit CSphGrouperJsonField ( const CSphAttrLocator & tLoc, ISphExpr * pExpr )
815 		: m_tLocator ( tLoc )
816 		, m_pExpr ( pExpr )
817 		, m_pStrings ( NULL )
818 	{}
819 
~CSphGrouperJsonField()820 	virtual ~CSphGrouperJsonField ()
821 	{
822 		SafeRelease ( m_pExpr );
823 	}
824 
SetStringPool(const BYTE * pStrings)825 	virtual void SetStringPool ( const BYTE * pStrings )
826 	{
827 		m_pStrings = pStrings;
828 		if ( m_pExpr )
829 			m_pExpr->Command ( SPH_EXPR_SET_STRING_POOL, (void*)pStrings );
830 	}
831 
GetLocator(CSphAttrLocator & tOut) const832 	virtual void GetLocator ( CSphAttrLocator & tOut ) const
833 	{
834 		tOut = m_tLocator;
835 	}
836 
GetResultType() const837 	virtual ESphAttr GetResultType () const
838 	{
839 		return SPH_ATTR_BIGINT;
840 	}
841 
KeyFromMatch(const CSphMatch & tMatch) const842 	virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const
843 	{
844 		if ( !m_pExpr )
845 			return SphGroupKey_t();
846 		return m_pExpr->Int64Eval ( tMatch );
847 	}
848 
KeyFromValue(SphAttr_t) const849 	virtual SphGroupKey_t KeyFromValue ( SphAttr_t ) const { assert(0); return SphGroupKey_t(); }
850 };
851 
852 
853 template <class PRED>
854 class CSphGrouperMulti : public CSphGrouper, public PRED
855 {
856 public:
CSphGrouperMulti(const CSphVector<CSphAttrLocator> & dLocators,const CSphVector<ESphAttr> & dAttrTypes,const CSphVector<ISphExpr * > & dJsonKeys)857 	CSphGrouperMulti ( const CSphVector<CSphAttrLocator> & dLocators, const CSphVector<ESphAttr> & dAttrTypes, const CSphVector<ISphExpr *> & dJsonKeys )
858 		: m_dLocators ( dLocators )
859 		, m_dAttrTypes ( dAttrTypes )
860 		, m_dJsonKeys ( dJsonKeys )
861 	{
862 		assert ( m_dLocators.GetLength()>1 );
863 		assert ( m_dLocators.GetLength()==m_dAttrTypes.GetLength() && m_dLocators.GetLength()==dJsonKeys.GetLength() );
864 	}
865 
~CSphGrouperMulti()866 	virtual ~CSphGrouperMulti ()
867 	{
868 		ARRAY_FOREACH ( i, m_dJsonKeys )
869 			SafeDelete ( m_dJsonKeys[i] );
870 	}
871 
KeyFromMatch(const CSphMatch & tMatch) const872 	virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const
873 	{
874 		SphGroupKey_t tKey = SPH_FNV64_SEED;
875 
876 		for ( int i=0; i<m_dLocators.GetLength(); i++ )
877 		{
878 			SphAttr_t tAttr = tMatch.GetAttr ( m_dLocators[i] );
879 			if ( m_dAttrTypes[i]==SPH_ATTR_STRING )
880 			{
881 				assert ( m_pStringBase );
882 
883 				const BYTE * pStr = NULL;
884 				int iLen = sphUnpackStr ( m_pStringBase+tAttr, &pStr );
885 
886 				if ( !pStr || !iLen )
887 					continue;
888 
889 				tKey = PRED::Hash ( pStr, iLen, tKey );
890 
891 			} else if ( m_dAttrTypes[i]==SPH_ATTR_JSON )
892 			{
893 				assert ( m_pStringBase );
894 
895 				const BYTE * pStr = NULL;
896 				int iLen = sphUnpackStr ( m_pStringBase+tAttr, &pStr );
897 
898 				if ( !pStr || !iLen )
899 					continue;
900 
901 				uint64_t uValue = m_dJsonKeys[i]->Int64Eval ( tMatch );
902 				const BYTE * pValue = m_pStringBase + ( uValue & 0xffffffff );
903 				ESphJsonType eRes = (ESphJsonType)( uValue >> 32 );
904 
905 				int i32Val;
906 				int64_t i64Val;
907 				double fVal;
908 				switch ( eRes )
909 				{
910 				case JSON_STRING:
911 					iLen = sphJsonUnpackInt ( &pValue );
912 					tKey = sphFNV64 ( pValue, iLen, tKey );
913 					break;
914 				case JSON_INT32:
915 					i32Val = sphJsonLoadInt ( &pValue );
916 					tKey = sphFNV64 ( &i32Val, sizeof(i32Val), tKey );
917 					break;
918 				case JSON_INT64:
919 					i64Val = sphJsonLoadBigint ( &pValue );
920 					tKey = sphFNV64 ( &i64Val, sizeof(i64Val), tKey );
921 					break;
922 				case JSON_DOUBLE:
923 					fVal = sphQW2D ( sphJsonLoadBigint ( &pValue ) );
924 					tKey = sphFNV64 ( &fVal, sizeof(fVal), tKey );
925 					break;
926 				default:
927 					break;
928 				}
929 
930 			} else
931 				tKey = sphFNV64 ( &tAttr, sizeof(SphAttr_t), tKey );
932 		}
933 
934 		return tKey;
935 	}
936 
SetStringPool(const BYTE * pStrings)937 	virtual void SetStringPool ( const BYTE * pStrings )
938 	{
939 		m_pStringBase = pStrings;
940 
941 		ARRAY_FOREACH ( i, m_dJsonKeys )
942 		{
943 			if ( m_dJsonKeys[i] )
944 				m_dJsonKeys[i]->Command ( SPH_EXPR_SET_STRING_POOL, (void*)pStrings );
945 		}
946 	}
947 
KeyFromValue(SphAttr_t) const948 	virtual SphGroupKey_t KeyFromValue ( SphAttr_t ) const { assert(0); return SphGroupKey_t(); }
GetLocator(CSphAttrLocator &) const949 	virtual void GetLocator ( CSphAttrLocator & ) const { assert(0); }
GetResultType() const950 	virtual ESphAttr GetResultType() const { return SPH_ATTR_BIGINT; }
CanMulti() const951 	virtual bool CanMulti () const { return false; }
952 
953 private:
954 	CSphVector<CSphAttrLocator>	m_dLocators;
955 	CSphVector<ESphAttr>		m_dAttrTypes;
956 	const BYTE *				m_pStringBase;
957 	CSphVector<ISphExpr *>		m_dJsonKeys;
958 };
959 
960 //////////////////////////////////////////////////////////////////////////
961 
962 /// simple fixed-size hash
963 /// doesn't keep the order
964 template < typename T, typename KEY, typename HASHFUNC >
965 class CSphFixedHash : ISphNoncopyable
966 {
967 protected:
968 	static const int			HASH_LIST_END	= -1;
969 	static const int			HASH_DELETED	= -2;
970 
971 	struct HashEntry_t
972 	{
973 		KEY		m_tKey;
974 		T		m_tValue;
975 		int		m_iNext;
976 	};
977 
978 protected:
979 	CSphVector<HashEntry_t>		m_dEntries;		///< key-value pairs storage pool
980 	CSphVector<int>				m_dHash;		///< hash into m_dEntries pool
981 
982 	int							m_iFree;		///< free pairs count
983 	CSphVector<int>				m_dFree;		///< free pair indexes
984 
985 public:
986 	/// ctor
CSphFixedHash(int iLength)987 	explicit CSphFixedHash ( int iLength )
988 	{
989 		int iBuckets = ( 2 << sphLog2 ( iLength-1 ) ); // less than 50% bucket usage guaranteed
990 		assert ( iLength>0 );
991 		assert ( iLength<=iBuckets );
992 
993 		m_dEntries.Resize ( iLength );
994 		m_dHash.Resize ( iBuckets );
995 		m_dFree.Resize ( iLength );
996 
997 		Reset ();
998 	}
999 
1000 	/// cleanup
Reset()1001 	void Reset ()
1002 	{
1003 		ARRAY_FOREACH ( i, m_dEntries )
1004 			m_dEntries[i].m_iNext = HASH_DELETED;
1005 
1006 		ARRAY_FOREACH ( i, m_dHash )
1007 			m_dHash[i] = HASH_LIST_END;
1008 
1009 		m_iFree = m_dFree.GetLength();
1010 		ARRAY_FOREACH ( i, m_dFree )
1011 			m_dFree[i] = i;
1012 	}
1013 
1014 	/// add new entry
1015 	/// returns NULL on success
1016 	/// returns pointer to value if already hashed, or replace it with new one, if insisted.
Add(const T & tValue,const KEY & tKey,bool bReplace=false)1017 	T * Add ( const T & tValue, const KEY & tKey, bool bReplace=false )
1018 	{
1019 		assert ( m_iFree>0 && "hash overflow" );
1020 
1021 		// check if it's already hashed
1022 		DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
1023 		int iPrev = -1, iEntry;
1024 
1025 		for ( iEntry=m_dHash[uHash]; iEntry>=0; iPrev=iEntry, iEntry=m_dEntries[iEntry].m_iNext )
1026 			if ( m_dEntries[iEntry].m_tKey==tKey )
1027 			{
1028 				if ( bReplace )
1029 					m_dEntries[iEntry].m_tValue = tValue;
1030 				return &m_dEntries[iEntry].m_tValue;
1031 			}
1032 		assert ( iEntry!=HASH_DELETED );
1033 
1034 		// if it's not, do add
1035 		int iNew = m_dFree [ --m_iFree ];
1036 
1037 		HashEntry_t & tNew = m_dEntries[iNew];
1038 		assert ( tNew.m_iNext==HASH_DELETED );
1039 
1040 		tNew.m_tKey = tKey;
1041 		tNew.m_tValue = tValue;
1042 		tNew.m_iNext = HASH_LIST_END;
1043 
1044 		if ( iPrev>=0 )
1045 		{
1046 			assert ( m_dEntries[iPrev].m_iNext==HASH_LIST_END );
1047 			m_dEntries[iPrev].m_iNext = iNew;
1048 		} else
1049 		{
1050 			assert ( m_dHash[uHash]==HASH_LIST_END );
1051 			m_dHash[uHash] = iNew;
1052 		}
1053 		return NULL;
1054 	}
1055 
1056 	/// remove entry from hash
Remove(const KEY & tKey)1057 	void Remove ( const KEY & tKey )
1058 	{
1059 		// check if it's already hashed
1060 		DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
1061 		int iPrev = -1, iEntry;
1062 
1063 		for ( iEntry=m_dHash[uHash]; iEntry>=0; iPrev=iEntry, iEntry=m_dEntries[iEntry].m_iNext )
1064 			if ( m_dEntries[iEntry].m_tKey==tKey )
1065 		{
1066 			// found, remove it
1067 			assert ( m_dEntries[iEntry].m_iNext!=HASH_DELETED );
1068 			if ( iPrev>=0 )
1069 				m_dEntries[iPrev].m_iNext = m_dEntries[iEntry].m_iNext;
1070 			else
1071 				m_dHash[uHash] = m_dEntries[iEntry].m_iNext;
1072 
1073 #ifndef NDEBUG
1074 			m_dEntries[iEntry].m_iNext = HASH_DELETED;
1075 #endif
1076 
1077 			m_dFree [ m_iFree++ ] = iEntry;
1078 			return;
1079 		}
1080 		assert ( iEntry!=HASH_DELETED );
1081 	}
1082 
1083 	/// get value pointer by key
operator ()(const KEY & tKey) const1084 	T * operator () ( const KEY & tKey ) const
1085 	{
1086 		DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
1087 		int iEntry;
1088 
1089 		for ( iEntry=m_dHash[uHash]; iEntry>=0; iEntry=m_dEntries[iEntry].m_iNext )
1090 			if ( m_dEntries[iEntry].m_tKey==tKey )
1091 				return (T*)&m_dEntries[iEntry].m_tValue;
1092 
1093 		assert ( iEntry!=HASH_DELETED );
1094 		return NULL;
1095 	}
1096 };
1097 
1098 
1099 /////////////////////////////////////////////////////////////////////////////
1100 
1101 /// (group,attrvalue) pair
1102 struct SphGroupedValue_t
1103 {
1104 public:
1105 	SphGroupKey_t	m_uGroup;
1106 	SphAttr_t		m_uValue;
1107 	int				m_iCount;
1108 
1109 public:
SphGroupedValue_tSphGroupedValue_t1110 	SphGroupedValue_t ()
1111 	{}
1112 
SphGroupedValue_tSphGroupedValue_t1113 	SphGroupedValue_t ( SphGroupKey_t uGroup, SphAttr_t uValue, int iCount )
1114 		: m_uGroup ( uGroup )
1115 		, m_uValue ( uValue )
1116 		, m_iCount ( iCount )
1117 	{}
1118 
operator <SphGroupedValue_t1119 	inline bool operator < ( const SphGroupedValue_t & rhs ) const
1120 	{
1121 		if ( m_uGroup!=rhs.m_uGroup ) return m_uGroup<rhs.m_uGroup;
1122 		if ( m_uValue!=rhs.m_uValue ) return m_uValue<rhs.m_uValue;
1123 		return m_iCount>rhs.m_iCount;
1124 	}
1125 
operator ==SphGroupedValue_t1126 	inline bool operator == ( const SphGroupedValue_t & rhs ) const
1127 	{
1128 		return m_uGroup==rhs.m_uGroup && m_uValue==rhs.m_uValue;
1129 	}
1130 };
1131 
1132 /// same as SphGroupedValue_t but without group
1133 struct SphUngroupedValue_t
1134 {
1135 public:
1136 	SphAttr_t		m_uValue;
1137 	int				m_iCount;
1138 
1139 public:
SphUngroupedValue_tSphUngroupedValue_t1140 	SphUngroupedValue_t ()
1141 	{}
1142 
SphUngroupedValue_tSphUngroupedValue_t1143 	SphUngroupedValue_t ( SphAttr_t uValue, int iCount )
1144 		: m_uValue ( uValue )
1145 		, m_iCount ( iCount )
1146 	{}
1147 
operator <SphUngroupedValue_t1148 	inline bool operator < ( const SphUngroupedValue_t & rhs ) const
1149 	{
1150 		if ( m_uValue!=rhs.m_uValue ) return m_uValue<rhs.m_uValue;
1151 		return m_iCount>rhs.m_iCount;
1152 	}
1153 
operator ==SphUngroupedValue_t1154 	inline bool operator == ( const SphUngroupedValue_t & rhs ) const
1155 	{
1156 		return m_uValue==rhs.m_uValue;
1157 	}
1158 };
1159 
1160 
1161 /// unique values counter
1162 /// used for COUNT(DISTINCT xxx) GROUP BY yyy queries
1163 class CSphUniqounter : public CSphVector<SphGroupedValue_t>
1164 {
1165 public:
1166 #ifndef NDEBUG
CSphUniqounter()1167 					CSphUniqounter () : m_iCountPos ( 0 ), m_bSorted ( true ) { Reserve ( 16384 ); }
Add(const SphGroupedValue_t & tValue)1168 	void			Add ( const SphGroupedValue_t & tValue )	{ CSphVector<SphGroupedValue_t>::Add ( tValue ); m_bSorted = false; }
Sort()1169 	void			Sort ()										{ CSphVector<SphGroupedValue_t>::Sort (); m_bSorted = true; }
1170 
1171 #else
1172 					CSphUniqounter () : m_iCountPos ( 0 ) {}
1173 #endif
1174 
1175 public:
1176 	int				CountStart ( SphGroupKey_t * pOutGroup );	///< starting counting distinct values, returns count and group key (0 if empty)
1177 	int				CountNext ( SphGroupKey_t * pOutGroup );	///< continues counting distinct values, returns count and group key (0 if done)
1178 	void			Compact ( SphGroupKey_t * pRemoveGroups, int iRemoveGroups );
1179 
1180 protected:
1181 	int				m_iCountPos;
1182 
1183 #ifndef NDEBUG
1184 	bool			m_bSorted;
1185 #endif
1186 };
1187 
1188 
CountStart(SphGroupKey_t * pOutGroup)1189 int CSphUniqounter::CountStart ( SphGroupKey_t * pOutGroup )
1190 {
1191 	m_iCountPos = 0;
1192 	return CountNext ( pOutGroup );
1193 }
1194 
1195 
CountNext(SphGroupKey_t * pOutGroup)1196 int CSphUniqounter::CountNext ( SphGroupKey_t * pOutGroup )
1197 {
1198 	assert ( m_bSorted );
1199 	if ( m_iCountPos>=m_iLength )
1200 		return 0;
1201 
1202 	SphGroupKey_t uGroup = m_pData[m_iCountPos].m_uGroup;
1203 	SphAttr_t uValue = m_pData[m_iCountPos].m_uValue;
1204 	int iCount = m_pData[m_iCountPos].m_iCount;
1205 	*pOutGroup = uGroup;
1206 
1207 	while ( m_iCountPos<m_iLength && m_pData[m_iCountPos].m_uGroup==uGroup )
1208 	{
1209 		if ( m_pData[m_iCountPos].m_uValue!=uValue )
1210 			iCount += m_pData[m_iCountPos].m_iCount;
1211 		uValue = m_pData[m_iCountPos].m_uValue;
1212 		m_iCountPos++;
1213 	}
1214 	return iCount;
1215 }
1216 
1217 
Compact(SphGroupKey_t * pRemoveGroups,int iRemoveGroups)1218 void CSphUniqounter::Compact ( SphGroupKey_t * pRemoveGroups, int iRemoveGroups )
1219 {
1220 	assert ( m_bSorted );
1221 	if ( !m_iLength )
1222 		return;
1223 
1224 	sphSort ( pRemoveGroups, iRemoveGroups );
1225 
1226 	SphGroupedValue_t * pSrc = m_pData;
1227 	SphGroupedValue_t * pDst = m_pData;
1228 	SphGroupedValue_t * pEnd = m_pData + m_iLength;
1229 
1230 	// skip remove-groups which are not in my list
1231 	while ( iRemoveGroups && (*pRemoveGroups)<pSrc->m_uGroup )
1232 	{
1233 		pRemoveGroups++;
1234 		iRemoveGroups--;
1235 	}
1236 
1237 	for ( ; pSrc<pEnd; pSrc++ )
1238 	{
1239 		// check if this entry needs to be removed
1240 		while ( iRemoveGroups && (*pRemoveGroups)<pSrc->m_uGroup )
1241 		{
1242 			pRemoveGroups++;
1243 			iRemoveGroups--;
1244 		}
1245 		if ( iRemoveGroups && pSrc->m_uGroup==*pRemoveGroups )
1246 			continue;
1247 
1248 		// check if it's a dupe
1249 		if ( pDst>m_pData && pDst[-1]==pSrc[0] )
1250 			continue;
1251 
1252 		*pDst++ = *pSrc;
1253 	}
1254 
1255 	assert ( pDst-m_pData<=m_iLength );
1256 	m_iLength = pDst-m_pData;
1257 }
1258 
1259 /////////////////////////////////////////////////////////////////////////////
1260 
1261 /// attribute magic
1262 enum
1263 {
1264 	SPH_VATTR_ID			= -1,	///< tells match sorter to use doc id
1265 	SPH_VATTR_RELEVANCE		= -2,	///< tells match sorter to use match weight
1266 	SPH_VATTR_FLOAT			= 10000	///< tells match sorter to compare floats
1267 };
1268 
1269 
1270 /// match comparator interface from group-by sorter point of view
1271 struct ISphMatchComparator
1272 {
~ISphMatchComparatorISphMatchComparator1273 	virtual ~ISphMatchComparator () {}
1274 	virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & tState ) const = 0;
1275 };
1276 
1277 
1278 /// additional group-by sorter settings
1279 struct CSphGroupSorterSettings
1280 {
1281 	CSphAttrLocator		m_tLocGroupby;		///< locator for @groupby
1282 	CSphAttrLocator		m_tLocCount;		///< locator for @count
1283 	CSphAttrLocator		m_tLocDistinct;		///< locator for @distinct
1284 	CSphAttrLocator		m_tDistinctLoc;		///< locator for attribute to compute count(distinct) for
1285 	bool				m_bDistinct;		///< whether we need distinct
1286 	bool				m_bMVA;				///< whether we're grouping by MVA attribute
1287 	bool				m_bMva64;
1288 	CSphGrouper *		m_pGrouper;			///< group key calculator
1289 	bool				m_bImplicit;		///< for queries with aggregate functions but without group by clause
1290 	const ISphFilter *	m_pAggrFilterTrait; ///< aggregate filter that got owned by grouper
1291 	bool				m_bJson;			///< whether we're grouping by Json attribute
1292 	CSphAttrLocator		m_tLocGroupbyStr;	///< locator for @groupbystr
1293 
CSphGroupSorterSettingsCSphGroupSorterSettings1294 	CSphGroupSorterSettings ()
1295 		: m_bDistinct ( false )
1296 		, m_bMVA ( false )
1297 		, m_bMva64 ( false )
1298 		, m_pGrouper ( NULL )
1299 		, m_bImplicit ( false )
1300 		, m_pAggrFilterTrait ( NULL )
1301 		, m_bJson ( false )
1302 	{}
1303 };
1304 
1305 
1306 /// aggregate function interface
1307 class IAggrFunc
1308 {
1309 public:
~IAggrFunc()1310 	virtual			~IAggrFunc() {}
Ungroup(CSphMatch *)1311 	virtual void	Ungroup ( CSphMatch * ) {}
1312 	virtual void	Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool bGrouped ) = 0;
Finalize(CSphMatch *)1313 	virtual void	Finalize ( CSphMatch * ) {}
1314 };
1315 
1316 
1317 /// aggregate traits for different attribute types
1318 template < typename T >
1319 class IAggrFuncTraits : public IAggrFunc
1320 {
1321 public:
IAggrFuncTraits(const CSphAttrLocator & tLocator)1322 	explicit		IAggrFuncTraits ( const CSphAttrLocator & tLocator ) : m_tLocator ( tLocator ) {}
1323 	inline T		GetValue ( const CSphMatch * pRow );
1324 	inline void		SetValue ( CSphMatch * pRow, T val );
1325 
1326 protected:
1327 	CSphAttrLocator	m_tLocator;
1328 };
1329 
1330 template<>
GetValue(const CSphMatch * pRow)1331 DWORD IAggrFuncTraits<DWORD>::GetValue ( const CSphMatch * pRow )
1332 {
1333 	return (DWORD)pRow->GetAttr ( m_tLocator );
1334 }
1335 
1336 template<>
SetValue(CSphMatch * pRow,DWORD val)1337 void IAggrFuncTraits<DWORD>::SetValue ( CSphMatch * pRow, DWORD val )
1338 {
1339 	pRow->SetAttr ( m_tLocator, val );
1340 }
1341 
1342 template<>
GetValue(const CSphMatch * pRow)1343 int64_t IAggrFuncTraits<int64_t>::GetValue ( const CSphMatch * pRow )
1344 {
1345 	return pRow->GetAttr ( m_tLocator );
1346 }
1347 
1348 template<>
SetValue(CSphMatch * pRow,int64_t val)1349 void IAggrFuncTraits<int64_t>::SetValue ( CSphMatch * pRow, int64_t val )
1350 {
1351 	pRow->SetAttr ( m_tLocator, val );
1352 }
1353 
1354 template<>
GetValue(const CSphMatch * pRow)1355 float IAggrFuncTraits<float>::GetValue ( const CSphMatch * pRow )
1356 {
1357 	return pRow->GetAttrFloat ( m_tLocator );
1358 }
1359 
1360 template<>
SetValue(CSphMatch * pRow,float val)1361 void IAggrFuncTraits<float>::SetValue ( CSphMatch * pRow, float val )
1362 {
1363 	pRow->SetAttrFloat ( m_tLocator, val );
1364 }
1365 
1366 
1367 
1368 /// SUM() implementation
1369 template < typename T >
1370 class AggrSum_t : public IAggrFuncTraits<T>
1371 {
1372 public:
AggrSum_t(const CSphAttrLocator & tLoc)1373 	explicit AggrSum_t ( const CSphAttrLocator & tLoc ) : IAggrFuncTraits<T> ( tLoc )
1374 	{}
1375 
Update(CSphMatch * pDst,const CSphMatch * pSrc,bool)1376 	virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
1377 	{
1378 		this->SetValue ( pDst, this->GetValue(pDst)+this->GetValue(pSrc) );
1379 	}
1380 };
1381 
1382 
1383 /// AVG() implementation
1384 template < typename T >
1385 class AggrAvg_t : public IAggrFuncTraits<T>
1386 {
1387 protected:
1388 	CSphAttrLocator m_tCountLoc;
1389 public:
AggrAvg_t(const CSphAttrLocator & tLoc,const CSphAttrLocator & tCountLoc)1390 	AggrAvg_t ( const CSphAttrLocator & tLoc, const CSphAttrLocator & tCountLoc ) : IAggrFuncTraits<T> ( tLoc ), m_tCountLoc ( tCountLoc )
1391 	{}
1392 
Ungroup(CSphMatch * pDst)1393 	virtual void Ungroup ( CSphMatch * pDst )
1394 	{
1395 		this->SetValue ( pDst, T ( this->GetValue ( pDst ) * pDst->GetAttr ( m_tCountLoc ) ) );
1396 	}
1397 
Update(CSphMatch * pDst,const CSphMatch * pSrc,bool bGrouped)1398 	virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool bGrouped )
1399 	{
1400 		if ( bGrouped )
1401 			this->SetValue ( pDst, T ( this->GetValue ( pDst ) + this->GetValue ( pSrc ) * pSrc->GetAttr ( m_tCountLoc ) ) );
1402 		else
1403 			this->SetValue ( pDst, this->GetValue ( pDst ) + this->GetValue ( pSrc ) );
1404 	}
1405 
Finalize(CSphMatch * pDst)1406 	virtual void Finalize ( CSphMatch * pDst )
1407 	{
1408 		this->SetValue ( pDst, T ( this->GetValue ( pDst ) / pDst->GetAttr ( m_tCountLoc ) ) );
1409 	}
1410 };
1411 
1412 
1413 /// MAX() implementation
1414 template < typename T >
1415 class AggrMax_t : public IAggrFuncTraits<T>
1416 {
1417 public:
AggrMax_t(const CSphAttrLocator & tLoc)1418 	explicit AggrMax_t ( const CSphAttrLocator & tLoc ) : IAggrFuncTraits<T> ( tLoc )
1419 	{}
1420 
Update(CSphMatch * pDst,const CSphMatch * pSrc,bool)1421 	virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
1422 	{
1423 		this->SetValue ( pDst, Max ( this->GetValue(pDst), this->GetValue(pSrc) ) );
1424 	}
1425 };
1426 
1427 
1428 /// MIN() implementation
1429 template < typename T >
1430 class AggrMin_t : public IAggrFuncTraits<T>
1431 {
1432 public:
AggrMin_t(const CSphAttrLocator & tLoc)1433 	explicit AggrMin_t ( const CSphAttrLocator & tLoc ) : IAggrFuncTraits<T> ( tLoc )
1434 	{}
1435 
Update(CSphMatch * pDst,const CSphMatch * pSrc,bool)1436 	virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
1437 	{
1438 		this->SetValue ( pDst, Min ( this->GetValue(pDst), this->GetValue(pSrc) ) );
1439 	}
1440 };
1441 
1442 
1443 /// GROUP_CONCAT() implementation
1444 class AggrConcat_t : public IAggrFunc
1445 {
1446 protected:
1447 	CSphAttrLocator	m_tLoc;
1448 
1449 public:
AggrConcat_t(const CSphColumnInfo & tCol)1450 	explicit AggrConcat_t ( const CSphColumnInfo & tCol )
1451 		: m_tLoc ( tCol.m_tLocator )
1452 	{}
1453 
Ungroup(CSphMatch *)1454 	void Ungroup ( CSphMatch * ) {}
Finalize(CSphMatch *)1455 	void Finalize ( CSphMatch * ) {}
1456 
Update(CSphMatch * pDst,const CSphMatch * pSrc,bool)1457 	void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
1458 	{
1459 		const char * sDst = (const char*) pDst->GetAttr(m_tLoc);
1460 		const char * sSrc = (const char*) pSrc->GetAttr(m_tLoc);
1461 		assert ( !sDst || *sDst ); // ensure the string is either NULL, or has data
1462 		assert ( !sSrc || *sSrc );
1463 
1464 		// empty source? kinda weird, but done!
1465 		if ( !sSrc )
1466 			return;
1467 
1468 		// empty destination? just clone the source
1469 		if ( !sDst )
1470 		{
1471 			if ( sSrc )
1472 				pDst->SetAttr ( m_tLoc, (SphAttr_t)strdup(sSrc) );
1473 			return;
1474 		}
1475 
1476 		// both source and destination present? append source to destination
1477 		// note that it gotta be manual copying here, as SetSprintf (currently) comes with a 1K limit
1478 		assert ( sDst && sSrc );
1479 		int iSrc = strlen ( sSrc );
1480 		int iDst = strlen ( sDst );
1481 		char * sNew = new char [ iSrc+iDst+2 ]; // OPTIMIZE? careful pre-reserve and/or realloc would be even faster
1482 		memcpy ( sNew, sDst, iDst );
1483 		sNew [ iDst ] = ',';
1484 		memcpy ( sNew+iDst+1, sSrc, iSrc );
1485 		sNew [ iSrc+iDst+1 ] = '\0';
1486 		pDst->SetAttr ( m_tLoc, (SphAttr_t)sNew );
1487 		SafeDeleteArray ( sDst );
1488 	}
1489 };
1490 
1491 
1492 /// group sorting functor
1493 template < typename COMPGROUP >
1494 struct GroupSorter_fn : public CSphMatchComparatorState, public MatchSortAccessor_t
1495 {
IsLessGroupSorter_fn1496 	bool IsLess ( const MEDIAN_TYPE a, const MEDIAN_TYPE b ) const
1497 	{
1498 		return COMPGROUP::IsLess ( *b, *a, *this );
1499 	}
1500 };
1501 
1502 
1503 struct MatchCloner_t
1504 {
1505 	CSphFixedVector<CSphRowitem>	m_dRowBuf;
1506 	CSphVector<CSphAttrLocator>		m_dAttrsRaw;
1507 	CSphVector<CSphAttrLocator>		m_dAttrsPtr;
1508 	const CSphRsetSchema *			m_pSchema;
1509 
MatchCloner_tMatchCloner_t1510 	MatchCloner_t ()
1511 		: m_dRowBuf ( 0 )
1512 		, m_pSchema ( NULL )
1513 	{ }
1514 
SetSchemaMatchCloner_t1515 	void SetSchema ( const CSphRsetSchema * pSchema )
1516 	{
1517 		m_pSchema = pSchema;
1518 		m_dRowBuf.Reset ( m_pSchema->GetDynamicSize() );
1519 	}
1520 
CombineMatchCloner_t1521 	void Combine ( CSphMatch * pDst, const CSphMatch * pSrc, const CSphMatch * pGroup )
1522 	{
1523 		assert ( m_pSchema && pDst && pSrc && pGroup );
1524 		assert ( pDst!=pGroup );
1525 		m_pSchema->CloneMatch ( pDst, *pSrc );
1526 
1527 		ARRAY_FOREACH ( i, m_dAttrsRaw )
1528 		{
1529 			pDst->SetAttr ( m_dAttrsRaw[i], pGroup->GetAttr ( m_dAttrsRaw[i] ) );
1530 		}
1531 
1532 		ARRAY_FOREACH ( i, m_dAttrsPtr )
1533 		{
1534 			assert ( !pDst->GetAttr ( m_dAttrsPtr[i] ) );
1535 			const char * sSrc = (const char*)pGroup->GetAttr ( m_dAttrsPtr[i] );
1536 			const char * sDst = NULL;
1537 			if ( sSrc && *sSrc )
1538 				sDst = strdup(sSrc);
1539 
1540 			pDst->SetAttr ( m_dAttrsPtr[i], (SphAttr_t)sDst );
1541 		}
1542 	}
1543 
CloneMatchCloner_t1544 	void Clone ( CSphMatch * pOld, const CSphMatch * pNew )
1545 	{
1546 		assert ( m_pSchema && pOld && pNew );
1547 		if ( pOld->m_pDynamic==NULL ) // no old match has no data to copy, just a fresh but old match
1548 		{
1549 			m_pSchema->CloneMatch ( pOld, *pNew );
1550 			return;
1551 		}
1552 
1553 		memcpy ( m_dRowBuf.Begin(), pOld->m_pDynamic, sizeof(m_dRowBuf[0]) * m_dRowBuf.GetLength() );
1554 
1555 		// don't let cloning operation to free old string data
1556 		// as it will be copied back
1557 		ARRAY_FOREACH ( i, m_dAttrsPtr )
1558 			pOld->SetAttr ( m_dAttrsPtr[i], 0 );
1559 
1560 		m_pSchema->CloneMatch ( pOld, *pNew );
1561 
1562 		ARRAY_FOREACH ( i, m_dAttrsRaw )
1563 			pOld->SetAttr ( m_dAttrsRaw[i], sphGetRowAttr ( m_dRowBuf.Begin(), m_dAttrsRaw[i] ) );
1564 		ARRAY_FOREACH ( i, m_dAttrsPtr )
1565 			pOld->SetAttr ( m_dAttrsPtr[i], sphGetRowAttr ( m_dRowBuf.Begin(), m_dAttrsPtr[i] ) );
1566 	}
1567 };
1568 
1569 
ExtractAggregates(const CSphRsetSchema & tSchema,const CSphAttrLocator & tLocCount,const ESphSortKeyPart * m_pGroupSorterKeyparts,const CSphAttrLocator * m_pGroupSorterLocator,CSphVector<IAggrFunc * > & dAggregates,CSphVector<IAggrFunc * > & dAvgs,MatchCloner_t & tCloner)1570 static void ExtractAggregates ( const CSphRsetSchema & tSchema, const CSphAttrLocator & tLocCount, const ESphSortKeyPart * m_pGroupSorterKeyparts, const CSphAttrLocator * m_pGroupSorterLocator,
1571 								CSphVector<IAggrFunc *> & dAggregates, CSphVector<IAggrFunc *> & dAvgs, MatchCloner_t & tCloner )
1572 {
1573 	for ( int i=0; i<tSchema.GetAttrsCount(); i++ )
1574 	{
1575 		const CSphColumnInfo & tAttr = tSchema.GetAttr(i);
1576 		bool bMagicAggr = IsGroupbyMagic ( tAttr.m_sName ) || sphIsSortStringInternal ( tAttr.m_sName.cstr() ); // magic legacy aggregates
1577 
1578 		if ( tAttr.m_eAggrFunc==SPH_AGGR_NONE || bMagicAggr )
1579 			continue;
1580 
1581 		switch ( tAttr.m_eAggrFunc )
1582 		{
1583 		case SPH_AGGR_SUM:
1584 			switch ( tAttr.m_eAttrType )
1585 			{
1586 			case SPH_ATTR_INTEGER:	dAggregates.Add ( new AggrSum_t<DWORD> ( tAttr.m_tLocator ) ); break;
1587 			case SPH_ATTR_BIGINT:	dAggregates.Add ( new AggrSum_t<int64_t> ( tAttr.m_tLocator ) ); break;
1588 			case SPH_ATTR_FLOAT:	dAggregates.Add ( new AggrSum_t<float> ( tAttr.m_tLocator ) ); break;
1589 			default:				assert ( 0 && "internal error: unhandled aggregate type" ); break;
1590 			}
1591 			break;
1592 
1593 		case SPH_AGGR_AVG:
1594 			switch ( tAttr.m_eAttrType )
1595 			{
1596 			case SPH_ATTR_INTEGER:	dAggregates.Add ( new AggrAvg_t<DWORD> ( tAttr.m_tLocator, tLocCount ) ); break;
1597 			case SPH_ATTR_BIGINT:	dAggregates.Add ( new AggrAvg_t<int64_t> ( tAttr.m_tLocator, tLocCount ) ); break;
1598 			case SPH_ATTR_FLOAT:	dAggregates.Add ( new AggrAvg_t<float> ( tAttr.m_tLocator, tLocCount ) ); break;
1599 			default:				assert ( 0 && "internal error: unhandled aggregate type" ); break;
1600 			}
1601 			// store avg to calculate these attributes prior to groups sort
1602 			for ( int iState=0; iState<CSphMatchComparatorState::MAX_ATTRS; iState++ )
1603 			{
1604 				ESphSortKeyPart eKeypart = m_pGroupSorterKeyparts[iState];
1605 				CSphAttrLocator tLoc = m_pGroupSorterLocator[iState];
1606 				if ( ( eKeypart==SPH_KEYPART_INT || eKeypart==SPH_KEYPART_FLOAT )
1607 					&& tLoc.m_bDynamic==tAttr.m_tLocator.m_bDynamic && tLoc.m_iBitOffset==tAttr.m_tLocator.m_iBitOffset
1608 					&& tLoc.m_iBitCount==tAttr.m_tLocator.m_iBitCount )
1609 				{
1610 					dAvgs.Add ( dAggregates.Last() );
1611 					break;
1612 				}
1613 			}
1614 			break;
1615 
1616 		case SPH_AGGR_MIN:
1617 			switch ( tAttr.m_eAttrType )
1618 			{
1619 			case SPH_ATTR_INTEGER:	dAggregates.Add ( new AggrMin_t<DWORD> ( tAttr.m_tLocator ) ); break;
1620 			case SPH_ATTR_BIGINT:	dAggregates.Add ( new AggrMin_t<int64_t> ( tAttr.m_tLocator ) ); break;
1621 			case SPH_ATTR_FLOAT:	dAggregates.Add ( new AggrMin_t<float> ( tAttr.m_tLocator ) ); break;
1622 			default:				assert ( 0 && "internal error: unhandled aggregate type" ); break;
1623 			}
1624 			break;
1625 
1626 		case SPH_AGGR_MAX:
1627 			switch ( tAttr.m_eAttrType )
1628 			{
1629 			case SPH_ATTR_INTEGER:	dAggregates.Add ( new AggrMax_t<DWORD> ( tAttr.m_tLocator ) ); break;
1630 			case SPH_ATTR_BIGINT:	dAggregates.Add ( new AggrMax_t<int64_t> ( tAttr.m_tLocator ) ); break;
1631 			case SPH_ATTR_FLOAT:	dAggregates.Add ( new AggrMax_t<float> ( tAttr.m_tLocator ) ); break;
1632 			default:				assert ( 0 && "internal error: unhandled aggregate type" ); break;
1633 			}
1634 			break;
1635 
1636 		case SPH_AGGR_CAT:
1637 			dAggregates.Add ( new AggrConcat_t ( tAttr ) );
1638 			break;
1639 
1640 		default:
1641 			assert ( 0 && "internal error: unhandled aggregate function" );
1642 			break;
1643 		}
1644 
1645 		if ( tAttr.m_eAggrFunc==SPH_AGGR_CAT )
1646 			tCloner.m_dAttrsPtr.Add ( tAttr.m_tLocator );
1647 		else
1648 			tCloner.m_dAttrsRaw.Add ( tAttr.m_tLocator );
1649 	}
1650 }
1651 
1652 
1653 /// match sorter with k-buffering and group-by
1654 template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
1655 class CSphKBufferGroupSorter : public CSphMatchQueueTraits, protected CSphGroupSorterSettings
1656 {
1657 protected:
1658 	ESphGroupBy		m_eGroupBy;			///< group-by function
1659 	CSphGrouper *	m_pGrouper;
1660 
1661 	CSphFixedHash < CSphMatch *, SphGroupKey_t, IdentityHash_fn >	m_hGroup2Match;
1662 
1663 protected:
1664 	int				m_iLimit;		///< max matches to be retrieved
1665 
1666 	CSphUniqounter	m_tUniq;
1667 	bool			m_bSortByDistinct;
1668 
1669 	GroupSorter_fn<COMPGROUP>	m_tGroupSorter;
1670 	const ISphMatchComparator *	m_pComp;
1671 
1672 	CSphVector<IAggrFunc *>		m_dAggregates;
1673 	CSphVector<IAggrFunc *>		m_dAvgs;
1674 	const ISphFilter *			m_pAggrFilter; ///< aggregate filter for matches on flatten
1675 	MatchCloner_t				m_tPregroup;
1676 
1677 	static const int			GROUPBY_FACTOR = 4;	///< allocate this times more storage when doing group-by (k, as in k-buffer)
1678 
1679 public:
1680 	/// ctor
CSphKBufferGroupSorter(const ISphMatchComparator * pComp,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings)1681 	CSphKBufferGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings ) // FIXME! make k configurable
1682 		: CSphMatchQueueTraits ( pQuery->m_iMaxMatches*GROUPBY_FACTOR, true )
1683 		, CSphGroupSorterSettings ( tSettings )
1684 		, m_eGroupBy ( pQuery->m_eGroupFunc )
1685 		, m_pGrouper ( tSettings.m_pGrouper )
1686 		, m_hGroup2Match ( pQuery->m_iMaxMatches*GROUPBY_FACTOR )
1687 		, m_iLimit ( pQuery->m_iMaxMatches )
1688 		, m_bSortByDistinct ( false )
1689 		, m_pComp ( pComp )
1690 		, m_pAggrFilter ( tSettings.m_pAggrFilterTrait )
1691 	{
1692 		assert ( GROUPBY_FACTOR>1 );
1693 		assert ( DISTINCT==false || tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
1694 
1695 		if_const ( NOTIFICATIONS )
1696 			m_dJustPopped.Reserve ( m_iSize );
1697 	}
1698 
1699 	/// schema setup
SetSchema(CSphRsetSchema & tSchema)1700 	virtual void SetSchema ( CSphRsetSchema & tSchema )
1701 	{
1702 		m_tSchema = tSchema;
1703 		m_tPregroup.SetSchema ( &m_tSchema );
1704 		m_tPregroup.m_dAttrsRaw.Add ( m_tLocGroupby );
1705 		m_tPregroup.m_dAttrsRaw.Add ( m_tLocCount );
1706 		if_const ( DISTINCT )
1707 		{
1708 			m_tPregroup.m_dAttrsRaw.Add ( m_tLocDistinct );
1709 		}
1710 		ExtractAggregates ( m_tSchema, m_tLocCount, m_tGroupSorter.m_eKeypart, m_tGroupSorter.m_tLocator, m_dAggregates, m_dAvgs, m_tPregroup );
1711 	}
1712 
1713 	/// dtor
~CSphKBufferGroupSorter()1714 	~CSphKBufferGroupSorter ()
1715 	{
1716 		SafeDelete ( m_pComp );
1717 		SafeDelete ( m_pGrouper );
1718 		SafeDelete ( m_pAggrFilter );
1719 		ARRAY_FOREACH ( i, m_dAggregates )
1720 			SafeDelete ( m_dAggregates[i] );
1721 	}
1722 
1723 	/// check if this sorter does groupby
IsGroupby() const1724 	virtual bool IsGroupby () const
1725 	{
1726 		return true;
1727 	}
1728 
CanMulti() const1729 	virtual bool CanMulti () const
1730 	{
1731 		if ( m_pGrouper && !m_pGrouper->CanMulti() )
1732 			return false;
1733 
1734 		if ( HasString ( &m_tState ) )
1735 			return false;
1736 
1737 		if ( HasString ( &m_tGroupSorter ) )
1738 			return false;
1739 
1740 		return true;
1741 	}
1742 
1743 	/// set string pool pointer (for string+groupby sorters)
SetStringPool(const BYTE * pStrings)1744 	void SetStringPool ( const BYTE * pStrings )
1745 	{
1746 		m_pGrouper->SetStringPool ( pStrings );
1747 	}
1748 
1749 	/// add entry to the queue
Push(const CSphMatch & tEntry)1750 	virtual bool Push ( const CSphMatch & tEntry )
1751 	{
1752 		SphGroupKey_t uGroupKey = m_pGrouper->KeyFromMatch ( tEntry );
1753 		return PushEx ( tEntry, uGroupKey, false, false );
1754 	}
1755 
1756 	/// add grouped entry to the queue
PushGrouped(const CSphMatch & tEntry,bool)1757 	virtual bool PushGrouped ( const CSphMatch & tEntry, bool )
1758 	{
1759 		return PushEx ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), true, false );
1760 	}
1761 
1762 	/// add entry to the queue
PushEx(const CSphMatch & tEntry,const SphGroupKey_t uGroupKey,bool bGrouped,bool,SphAttr_t * pAttr=NULL)1763 	virtual bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool bGrouped, bool, SphAttr_t * pAttr=NULL )
1764 	{
1765 		if_const ( NOTIFICATIONS )
1766 		{
1767 			m_iJustPushed = 0;
1768 			m_dJustPopped.Resize ( 0 );
1769 		}
1770 
1771 		// if this group is already hashed, we only need to update the corresponding match
1772 		CSphMatch ** ppMatch = m_hGroup2Match ( uGroupKey );
1773 		if ( ppMatch )
1774 		{
1775 			CSphMatch * pMatch = (*ppMatch);
1776 			assert ( pMatch );
1777 			assert ( pMatch->GetAttr ( m_tLocGroupby )==uGroupKey );
1778 			assert ( pMatch->m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
1779 
1780 			if ( bGrouped )
1781 			{
1782 				// it's already grouped match
1783 				// sum grouped matches count
1784 				pMatch->SetAttr ( m_tLocCount, pMatch->GetAttr ( m_tLocCount ) + tEntry.GetAttr ( m_tLocCount ) ); // OPTIMIZE! AddAttr()?
1785 			} else
1786 			{
1787 				// it's a simple match
1788 				// increase grouped matches count
1789 				pMatch->SetAttr ( m_tLocCount, 1 + pMatch->GetAttr ( m_tLocCount ) ); // OPTIMIZE! IncAttr()?
1790 			}
1791 
1792 			// update aggregates
1793 			ARRAY_FOREACH ( i, m_dAggregates )
1794 				m_dAggregates[i]->Update ( pMatch, &tEntry, bGrouped );
1795 
1796 			// if new entry is more relevant, update from it
1797 			if ( m_pComp->VirtualIsLess ( *pMatch, tEntry, m_tState ) )
1798 			{
1799 				if_const ( NOTIFICATIONS )
1800 				{
1801 					m_iJustPushed = tEntry.m_uDocID;
1802 					m_dJustPopped.Add ( pMatch->m_uDocID );
1803 				}
1804 
1805 				// clone the low part of the match
1806 				m_tPregroup.Clone ( pMatch, &tEntry );
1807 
1808 				// update @groupbystr value, if available
1809 				if ( pAttr && m_tLocGroupbyStr.m_bDynamic )
1810 					pMatch->SetAttr ( m_tLocGroupbyStr, *pAttr );
1811 			}
1812 		}
1813 
1814 		// submit actual distinct value in all cases
1815 		if_const ( DISTINCT )
1816 		{
1817 			int iCount = 1;
1818 			if ( bGrouped )
1819 				iCount = (int)tEntry.GetAttr ( m_tLocDistinct );
1820 			m_tUniq.Add ( SphGroupedValue_t ( uGroupKey, tEntry.GetAttr ( m_tDistinctLoc ), iCount ) ); // OPTIMIZE! use simpler locator here?
1821 		}
1822 
1823 		// it's a dupe anyway, so we shouldn't update total matches count
1824 		if ( ppMatch )
1825 			return false;
1826 
1827 		// if we're full, let's cut off some worst groups
1828 		if ( m_iUsed==m_iSize )
1829 			CutWorst ( m_iLimit * (int)(GROUPBY_FACTOR/2) );
1830 
1831 		// do add
1832 		assert ( m_iUsed<m_iSize );
1833 		CSphMatch & tNew = m_pData [ m_iUsed++ ];
1834 		m_tSchema.CloneMatch ( &tNew, tEntry );
1835 
1836 		if_const ( NOTIFICATIONS )
1837 			m_iJustPushed = tNew.m_uDocID;
1838 
1839 		if ( !bGrouped )
1840 		{
1841 			tNew.SetAttr ( m_tLocGroupby, uGroupKey );
1842 			tNew.SetAttr ( m_tLocCount, 1 );
1843 			if_const ( DISTINCT )
1844 				tNew.SetAttr ( m_tLocDistinct, 0 );
1845 
1846 			// set @groupbystr value if available
1847 			if ( pAttr && m_tLocGroupbyStr.m_bDynamic )
1848 				tNew.SetAttr ( m_tLocGroupbyStr, *pAttr );
1849 		} else
1850 		{
1851 			ARRAY_FOREACH ( i, m_dAggregates )
1852 				m_dAggregates[i]->Ungroup ( &tNew );
1853 		}
1854 
1855 		m_hGroup2Match.Add ( &tNew, uGroupKey );
1856 		m_iTotal++;
1857 		return true;
1858 	}
1859 
CalcAvg(bool bGroup)1860 	void CalcAvg ( bool bGroup )
1861 	{
1862 		if ( !m_dAvgs.GetLength() )
1863 			return;
1864 
1865 		CSphMatch * pMatch = m_pData;
1866 		CSphMatch * pEnd = pMatch + m_iUsed;
1867 		while ( pMatch<pEnd )
1868 		{
1869 			ARRAY_FOREACH ( j, m_dAvgs )
1870 			{
1871 				if ( bGroup )
1872 					m_dAvgs[j]->Finalize ( pMatch );
1873 				else
1874 					m_dAvgs[j]->Ungroup ( pMatch );
1875 			}
1876 			++pMatch;
1877 		}
1878 	}
1879 
1880 	/// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch * pTo,int iTag)1881 	int Flatten ( CSphMatch * pTo, int iTag )
1882 	{
1883 		CountDistinct ();
1884 
1885 		CalcAvg ( true );
1886 		SortGroups ();
1887 
1888 		CSphVector<IAggrFunc *> dAggrs;
1889 		if ( m_dAggregates.GetLength()!=m_dAvgs.GetLength() )
1890 		{
1891 			dAggrs = m_dAggregates;
1892 			ARRAY_FOREACH ( i, m_dAvgs )
1893 				dAggrs.RemoveValue ( m_dAvgs[i] );
1894 		}
1895 
1896 		// FIXME!!! we should provide up-to max_matches to output buffer
1897 		const CSphMatch * pBegin = pTo;
1898 		int iLen = GetLength ();
1899 		for ( int i=0; i<iLen; ++i )
1900 		{
1901 			CSphMatch & tMatch = m_pData[i];
1902 			ARRAY_FOREACH ( j, dAggrs )
1903 				dAggrs[j]->Finalize ( &tMatch );
1904 
1905 			// HAVING filtering
1906 			if ( m_pAggrFilter && !m_pAggrFilter->Eval ( tMatch ) )
1907 				continue;
1908 
1909 			m_tSchema.CloneMatch ( pTo, tMatch );
1910 			if ( iTag>=0 )
1911 				pTo->m_iTag = iTag;
1912 
1913 			pTo++;
1914 		}
1915 
1916 		m_iUsed = 0;
1917 		m_iTotal = 0;
1918 
1919 		m_hGroup2Match.Reset ();
1920 		if_const ( DISTINCT )
1921 			m_tUniq.Resize ( 0 );
1922 
1923 		return ( pTo-pBegin );
1924 	}
1925 
1926 	/// get entries count
GetLength() const1927 	int GetLength () const
1928 	{
1929 		return Min ( m_iUsed, m_iLimit );
1930 	}
1931 
1932 	/// set group comparator state
SetGroupState(const CSphMatchComparatorState & tState)1933 	void SetGroupState ( const CSphMatchComparatorState & tState )
1934 	{
1935 		m_tGroupSorter.m_fnStrCmp = tState.m_fnStrCmp;
1936 
1937 		// FIXME! manual bitwise copying.. yuck
1938 		for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
1939 		{
1940 			m_tGroupSorter.m_eKeypart[i] = tState.m_eKeypart[i];
1941 			m_tGroupSorter.m_tLocator[i] = tState.m_tLocator[i];
1942 		}
1943 		m_tGroupSorter.m_uAttrDesc = tState.m_uAttrDesc;
1944 		m_tGroupSorter.m_iNow = tState.m_iNow;
1945 
1946 		// check whether we sort by distinct
1947 		if_const ( DISTINCT && m_tDistinctLoc.m_iBitOffset>=0 )
1948 			for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
1949 				if ( m_tGroupSorter.m_tLocator[i].m_iBitOffset==m_tDistinctLoc.m_iBitOffset )
1950 			{
1951 				m_bSortByDistinct = true;
1952 				break;
1953 			}
1954 	}
1955 
1956 protected:
1957 	/// count distinct values if necessary
CountDistinct()1958 	void CountDistinct ()
1959 	{
1960 		if_const ( DISTINCT )
1961 		{
1962 			m_tUniq.Sort ();
1963 			SphGroupKey_t uGroup;
1964 			for ( int iCount = m_tUniq.CountStart ( &uGroup ); iCount; iCount = m_tUniq.CountNext ( &uGroup ) )
1965 			{
1966 				CSphMatch ** ppMatch = m_hGroup2Match(uGroup);
1967 				if ( ppMatch )
1968 					(*ppMatch)->SetAttr ( m_tLocDistinct, iCount );
1969 			}
1970 		}
1971 	}
1972 
1973 	/// cut worst N groups off the buffer tail
CutWorst(int iBound)1974 	void CutWorst ( int iBound )
1975 	{
1976 		// sort groups
1977 		if ( m_bSortByDistinct )
1978 			CountDistinct ();
1979 
1980 		CalcAvg ( true );
1981 		SortGroups ();
1982 		CalcAvg ( false );
1983 
1984 		if_const ( NOTIFICATIONS )
1985 		{
1986 			for ( int i = iBound; i < m_iUsed; ++i )
1987 				m_dJustPopped.Add ( m_pData[i].m_uDocID );
1988 		}
1989 
1990 		// cleanup unused distinct stuff
1991 		if_const ( DISTINCT )
1992 		{
1993 			// build kill-list
1994 			CSphVector<SphGroupKey_t> dRemove;
1995 			dRemove.Resize ( m_iUsed-iBound );
1996 			ARRAY_FOREACH ( i, dRemove )
1997 				dRemove[i] = m_pData[iBound+i].GetAttr ( m_tLocGroupby );
1998 
1999 			// sort and compact
2000 			if ( !m_bSortByDistinct )
2001 				m_tUniq.Sort ();
2002 			m_tUniq.Compact ( &dRemove[0], m_iUsed-iBound );
2003 		}
2004 
2005 		// rehash
2006 		m_hGroup2Match.Reset ();
2007 		for ( int i=0; i<iBound; i++ )
2008 			m_hGroup2Match.Add ( m_pData+i, m_pData[i].GetAttr ( m_tLocGroupby ) );
2009 
2010 		// cut groups
2011 		m_iUsed = iBound;
2012 	}
2013 
2014 	/// sort groups buffer
SortGroups()2015 	void SortGroups ()
2016 	{
2017 		sphSort ( m_pData, m_iUsed, m_tGroupSorter, m_tGroupSorter );
2018 	}
2019 
Finalize(ISphMatchProcessor & tProcessor,bool)2020 	virtual void Finalize ( ISphMatchProcessor & tProcessor, bool )
2021 	{
2022 		if ( !GetLength() )
2023 			return;
2024 
2025 		if ( m_iUsed>m_iLimit )
2026 			CutWorst ( m_iLimit );
2027 
2028 		// just evaluate in heap order
2029 		CSphMatch * pCur = m_pData;
2030 		const CSphMatch * pEnd = m_pData + m_iUsed;
2031 		while ( pCur<pEnd )
2032 			tProcessor.Process ( pCur++ );
2033 	}
2034 };
2035 
2036 
2037 /// match sorter with k-buffering and N-best group-by
2038 template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
2039 class CSphKBufferNGroupSorter : public CSphMatchQueueTraits, protected CSphGroupSorterSettings
2040 {
2041 protected:
2042 	ESphGroupBy		m_eGroupBy;			///< group-by function
2043 	CSphGrouper *	m_pGrouper;
2044 
2045 	CSphFixedHash < CSphMatch *, SphGroupKey_t, IdentityHash_fn >	m_hGroup2Match;
2046 
2047 protected:
2048 	int				m_iLimit;		///< max matches to be retrieved
2049 	int				m_iGLimit;		///< limit per one group
2050 	CSphFixedVector<int>	m_dGroupByList;	///< chains of equal matches from groups
2051 	CSphFixedVector<int>	m_dGroupsLen;	///< lengths of chains of equal matches from groups
2052 	int				m_iFreeHeads;		///< insertion point for head matches.
2053 	CSphFreeList	m_dFreeTails;		///< where to put tails of the subgroups
2054 	SphGroupKey_t	m_uLastGroupKey;	///< helps to determine in pushEx whether the new subgroup started
2055 #ifndef NDEBUG
2056 	int				m_iruns;		///< helpers for conditional breakpoints on debug
2057 	int				m_ipushed;
2058 #endif
2059 	CSphUniqounter	m_tUniq;
2060 	bool			m_bSortByDistinct;
2061 
2062 	GroupSorter_fn<COMPGROUP>	m_tGroupSorter;
2063 	const ISphMatchComparator *	m_pComp;
2064 
2065 	CSphVector<IAggrFunc *>		m_dAggregates;
2066 	CSphVector<IAggrFunc *>		m_dAvgs;
2067 	const ISphFilter *			m_pAggrFilter; ///< aggregate filter for matches on flatten
2068 	MatchCloner_t				m_tPregroup;
2069 
2070 	static const int			GROUPBY_FACTOR = 4;	///< allocate this times more storage when doing group-by (k, as in k-buffer)
2071 
2072 protected:
GetFreePos(bool bTailPos)2073 	inline int GetFreePos ( bool bTailPos )
2074 	{
2075 		// if we're full, let's cut off some worst groups
2076 		if ( m_iUsed==m_iSize )
2077 		{
2078 			CutWorst ( m_iLimit * (int)(GROUPBY_FACTOR/2) );
2079 			// don't return true value for tail this case,
2080 			// since the context might be changed by the CutWorst!
2081 			if ( bTailPos )
2082 				return -1;
2083 		}
2084 
2085 		// do add
2086 		assert ( m_iUsed<m_iSize );
2087 		++m_iUsed;
2088 		if ( bTailPos )
2089 		{
2090 			int iRes = m_dFreeTails.Get() + m_iSize;
2091 			assert ( iRes<CSphMatchQueueTraits::GetDataLength() );
2092 			return iRes;
2093 		} else
2094 		{
2095 			assert ( m_iFreeHeads<m_iSize );
2096 			return m_iFreeHeads++;
2097 		}
2098 	}
2099 
2100 public:
2101 	/// ctor
CSphKBufferNGroupSorter(const ISphMatchComparator * pComp,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings)2102 	CSphKBufferNGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings ) // FIXME! make k configurable
2103 		: CSphMatchQueueTraits ( ( pQuery->m_iGroupbyLimit>1 ? 2 : 1 ) * pQuery->m_iMaxMatches * GROUPBY_FACTOR, true )
2104 		, CSphGroupSorterSettings ( tSettings )
2105 		, m_eGroupBy ( pQuery->m_eGroupFunc )
2106 		, m_pGrouper ( tSettings.m_pGrouper )
2107 		, m_hGroup2Match ( pQuery->m_iMaxMatches*GROUPBY_FACTOR*2 )
2108 		, m_iLimit ( pQuery->m_iMaxMatches )
2109 		, m_iGLimit ( pQuery->m_iGroupbyLimit )
2110 		, m_dGroupByList ( 0 )
2111 		, m_dGroupsLen ( 0 )
2112 		, m_iFreeHeads ( 0 )
2113 		, m_uLastGroupKey ( -1 )
2114 		, m_bSortByDistinct ( false )
2115 		, m_pComp ( pComp )
2116 		, m_pAggrFilter ( tSettings.m_pAggrFilterTrait )
2117 	{
2118 		assert ( GROUPBY_FACTOR>1 );
2119 		assert ( DISTINCT==false || tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
2120 		assert ( m_iGLimit > 1 );
2121 
2122 		if_const ( NOTIFICATIONS )
2123 			m_dJustPopped.Reserve ( m_iSize );
2124 
2125 		// trick! This case we allocated 2*m_iSize mem.
2126 		// range 0..m_iSize used for 1-st matches of each subgroup (i.e., for heads)
2127 		// range m_iSize+1..2*m_iSize used for the tails.
2128 		m_dGroupByList.Reset ( m_iSize );
2129 		m_dGroupsLen.Reset ( m_iSize );
2130 		m_iSize >>= 1;
2131 		ARRAY_FOREACH ( i, m_dGroupByList )
2132 		{
2133 			m_dGroupByList[i] = -1;
2134 			m_dGroupsLen[i] = 0;
2135 		}
2136 		m_dFreeTails.Reset ( m_iSize );
2137 
2138 #ifndef NDEBUG
2139 		m_iruns = 0;
2140 		m_ipushed = 0;
2141 #endif
2142 	}
2143 
2144 	// only for n-group
GetDataLength() const2145 	virtual int			GetDataLength () const
2146 	{
2147 		return CSphMatchQueueTraits::GetDataLength()/2;
2148 	}
2149 
2150 	/// schema setup
SetSchema(CSphRsetSchema & tSchema)2151 	virtual void SetSchema ( CSphRsetSchema & tSchema )
2152 	{
2153 		m_tSchema = tSchema;
2154 
2155 		m_tPregroup.SetSchema ( &m_tSchema );
2156 		m_tPregroup.m_dAttrsRaw.Add ( m_tLocGroupby );
2157 		m_tPregroup.m_dAttrsRaw.Add ( m_tLocCount );
2158 		if_const ( DISTINCT )
2159 		{
2160 			m_tPregroup.m_dAttrsRaw.Add ( m_tLocDistinct );
2161 		}
2162 		ExtractAggregates ( m_tSchema, m_tLocCount, m_tGroupSorter.m_eKeypart, m_tGroupSorter.m_tLocator, m_dAggregates, m_dAvgs, m_tPregroup );
2163 	}
2164 
2165 	/// dtor
~CSphKBufferNGroupSorter()2166 	virtual ~CSphKBufferNGroupSorter ()
2167 	{
2168 		SafeDelete ( m_pComp );
2169 		SafeDelete ( m_pGrouper );
2170 		SafeDelete ( m_pAggrFilter );
2171 		ARRAY_FOREACH ( i, m_dAggregates )
2172 			SafeDelete ( m_dAggregates[i] );
2173 	}
2174 
2175 	/// check if this sorter does groupby
IsGroupby() const2176 	virtual bool IsGroupby () const
2177 	{
2178 		return true;
2179 	}
2180 
CanMulti() const2181 	virtual bool CanMulti () const
2182 	{
2183 		if ( m_pGrouper && !m_pGrouper->CanMulti() )
2184 			return false;
2185 
2186 		if ( HasString ( &m_tState ) )
2187 			return false;
2188 
2189 		if ( HasString ( &m_tGroupSorter ) )
2190 			return false;
2191 
2192 		return true;
2193 	}
2194 
2195 	/// set string pool pointer (for string+groupby sorters)
SetStringPool(const BYTE * pStrings)2196 	void SetStringPool ( const BYTE * pStrings )
2197 	{
2198 		m_pGrouper->SetStringPool ( pStrings );
2199 	}
2200 
2201 	/// add entry to the queue
Push(const CSphMatch & tEntry)2202 	virtual bool Push ( const CSphMatch & tEntry )
2203 	{
2204 		SphGroupKey_t uGroupKey = m_pGrouper->KeyFromMatch ( tEntry );
2205 		return PushEx ( tEntry, uGroupKey, false, false );
2206 	}
2207 
2208 	/// add grouped entry to the queue
PushGrouped(const CSphMatch & tEntry,bool bNewSet)2209 	virtual bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet )
2210 	{
2211 		return PushEx ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), true, bNewSet );
2212 	}
2213 
FreeMatchChain(int iPos)2214 	void FreeMatchChain ( int iPos )
2215 	{
2216 		while ( iPos>=0 )
2217 		{
2218 			if_const ( NOTIFICATIONS )
2219 				m_dJustPopped.Add ( m_pData[iPos].m_uDocID );
2220 
2221 			int iLastPos = iPos;
2222 			m_tSchema.FreeStringPtrs ( m_pData + iPos );
2223 			iPos = m_dGroupByList[iPos];
2224 			m_dGroupByList[iLastPos] = -1;
2225 			if ( iLastPos>=m_iSize )
2226 				m_dFreeTails.Free ( iLastPos - m_iSize );
2227 		}
2228 	}
2229 
2230 	// insert a match into subgroup, or discard it
2231 	// returns 0 if no place now (need to flush)
2232 	// returns 1 if value was discarded or replaced other existing value
2233 	// returns 2 if value was added.
2234 	enum InsertRes_e
2235 	{
2236 		INSERT_NONE,
2237 		INSERT_REPLACED,
2238 		INSERT_ADDED
2239 	};
2240 
InsertMatch(int iPos,const CSphMatch & tEntry)2241 	InsertRes_e InsertMatch ( int iPos, const CSphMatch & tEntry )
2242 	{
2243 		const int iHead = iPos;
2244 		int iPrev = -1;
2245 		const bool bDoAdd = ( m_dGroupsLen[iHead]<m_iGLimit );
2246 		while ( iPos>=0 )
2247 		{
2248 			CSphMatch * pMatch = m_pData+iPos;
2249 			if ( m_pComp->VirtualIsLess ( *pMatch, tEntry, m_tState ) ) // the tEntry is better than current *pMatch
2250 			{
2251 				int iNew = iPos;
2252 				if ( bDoAdd )
2253 				{
2254 					iNew = GetFreePos ( true ); // add to the tails (2-nd subrange)
2255 					if ( iNew<0 )
2256 						return INSERT_NONE;
2257 				} else
2258 				{
2259 					int iPreLast = iPrev;
2260 					while ( m_dGroupByList[iNew]>=0 )
2261 					{
2262 						iPreLast = iNew;
2263 						iNew = m_dGroupByList[iNew];
2264 					}
2265 
2266 					m_tSchema.FreeStringPtrs ( m_pData + iNew );
2267 					m_dGroupByList[iPreLast] = -1;
2268 
2269 					if ( iPos==iNew ) // avoid cycle link to itself
2270 						iPos = -1;
2271 				}
2272 
2273 				CSphMatch & tNew = m_pData [ iNew ];
2274 
2275 				if_const ( NOTIFICATIONS )
2276 				{
2277 					m_iJustPushed = tEntry.m_uDocID;
2278 					if ( tNew.m_uDocID!=0 )
2279 						m_dJustPopped.Add ( tNew.m_uDocID );
2280 				}
2281 				if ( bDoAdd )
2282 					++m_dGroupsLen[iHead];
2283 
2284 				if ( iPos==iHead ) // this is the first elem, need to copy groupby staff from it
2285 				{
2286 					// trick point! The first elem MUST live in the low half of the pool.
2287 					// So, replacing the first is actually moving existing one to the last half,
2288 					// then overwriting the one in the low half with the new value and link them
2289 					m_tPregroup.Clone ( &tNew, pMatch );
2290 					m_tPregroup.Clone ( pMatch, &tEntry );
2291 					m_dGroupByList[iNew] = m_dGroupByList[iPos];
2292 					m_dGroupByList[iPos] = iNew;
2293 				} else // this is elem somewhere in the chain, just shift it.
2294 				{
2295 					m_tPregroup.Clone ( &tNew, &tEntry );
2296 					m_dGroupByList[iPrev] = iNew;
2297 					m_dGroupByList[iNew] = iPos;
2298 				}
2299 				break;
2300 			}
2301 			iPrev = iPos;
2302 			iPos = m_dGroupByList[iPos];
2303 		}
2304 
2305 		if ( iPos<0 && bDoAdd ) // this item is less than everything, but still appropriate
2306 		{
2307 			int iNew = GetFreePos ( true ); // add to the tails (2-nd subrange)
2308 			if ( iNew<0 )
2309 				return INSERT_NONE;
2310 
2311 			CSphMatch & tNew = m_pData [ iNew ];
2312 			m_tPregroup.Clone ( &tNew, &tEntry );
2313 			m_dGroupByList[iPrev] = iNew;
2314 			m_dGroupByList[iNew] = iPos;
2315 
2316 			if_const ( NOTIFICATIONS )
2317 				m_iJustPushed = tEntry.m_uDocID;
2318 			if ( bDoAdd )
2319 				++m_dGroupsLen[iHead];
2320 		}
2321 
2322 		return bDoAdd ? INSERT_ADDED : INSERT_REPLACED;
2323 	}
2324 
2325 #ifndef NDEBUG
CheckIntegrity()2326 	void CheckIntegrity()
2327 	{
2328 		int iTotalLen = 0;
2329 		for ( int i=0; i<m_iFreeHeads; ++i )
2330 		{
2331 			int iLen = 0;
2332 			int iCur = i;
2333 			while ( iCur>=0 )
2334 			{
2335 				int iNext = m_dGroupByList[iCur];
2336 				assert ( iNext==-1 || m_pData[iCur].GetAttr ( m_tLocGroupby )==0 || m_pData[iNext].GetAttr ( m_tLocGroupby )==0
2337 						|| m_pData[iCur].GetAttr ( m_tLocGroupby )==m_pData[iNext].GetAttr ( m_tLocGroupby ) );
2338 				++iLen;
2339 				iCur = iNext;
2340 			}
2341 			assert ( m_dGroupsLen[i]==iLen );
2342 			iTotalLen += iLen;
2343 		}
2344 		assert ( iTotalLen==m_iUsed );
2345 	}
2346 #define CHECKINTEGRITY() CheckIntegrity()
2347 #else
2348 #define CHECKINTEGRITY()
2349 #endif
2350 
2351 	/// add entry to the queue
PushEx(const CSphMatch & tEntry,const SphGroupKey_t uGroupKey,bool bGrouped,bool bNewSet)2352 	virtual bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool bGrouped, bool bNewSet )
2353 	{
2354 #ifndef NDEBUG
2355 		++m_ipushed;
2356 #endif
2357 		CHECKINTEGRITY();
2358 		if_const ( NOTIFICATIONS )
2359 		{
2360 			m_iJustPushed = 0;
2361 			m_dJustPopped.Resize ( 0 );
2362 		}
2363 
2364 		// if this group is already hashed, we only need to update the corresponding match
2365 		CSphMatch ** ppMatch = m_hGroup2Match ( uGroupKey );
2366 		if ( ppMatch )
2367 		{
2368 			CSphMatch * pMatch = (*ppMatch);
2369 			assert ( pMatch );
2370 			assert ( pMatch->GetAttr ( m_tLocGroupby )==uGroupKey );
2371 			assert ( pMatch->m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
2372 
2373 			if ( bGrouped )
2374 			{
2375 				// it's already grouped match
2376 				// sum grouped matches count
2377 				if ( bNewSet || uGroupKey!=m_uLastGroupKey )
2378 				{
2379 					pMatch->SetAttr ( m_tLocCount, pMatch->GetAttr ( m_tLocCount ) + tEntry.GetAttr ( m_tLocCount ) ); // OPTIMIZE! AddAttr()?
2380 					m_uLastGroupKey = uGroupKey;
2381 					bNewSet = true;
2382 				}
2383 			} else
2384 			{
2385 				// it's a simple match
2386 				// increase grouped matches count
2387 				pMatch->SetAttr ( m_tLocCount, 1 + pMatch->GetAttr ( m_tLocCount ) ); // OPTIMIZE! IncAttr()?
2388 			}
2389 
2390 			bNewSet |= !bGrouped;
2391 
2392 			// update aggregates
2393 			if ( bNewSet )
2394 				ARRAY_FOREACH ( i, m_dAggregates )
2395 					m_dAggregates[i]->Update ( pMatch, &tEntry, bGrouped );
2396 
2397 			// if new entry is more relevant, update from it
2398 			InsertRes_e eRes = InsertMatch ( pMatch-m_pData, tEntry );
2399 			if ( eRes==INSERT_NONE )
2400 			{
2401 				// need to keep all poped values
2402 				CSphTightVector<SphDocID_t> dJustPopped;
2403 				dJustPopped.SwapData ( m_dJustPopped );
2404 
2405 				// was no insertion because cache cleaning. Recall myself
2406 				PushEx ( tEntry, uGroupKey, bGrouped, bNewSet );
2407 
2408 				// post Push work
2409 				ARRAY_FOREACH ( i, dJustPopped )
2410 					m_dJustPopped.Add ( dJustPopped[i] );
2411 				CSphMatch ** ppDec = m_hGroup2Match ( uGroupKey );
2412 				assert ( ppDec );
2413 				(*ppDec)->SetAttr ( m_tLocCount, (*ppDec)->GetAttr ( m_tLocCount )-1 );
2414 
2415 			} else if ( eRes==INSERT_ADDED )
2416 			{
2417 				if ( bGrouped )
2418 					return true;
2419 				++m_iTotal;
2420 			}
2421 		}
2422 
2423 		// submit actual distinct value in all cases
2424 		if_const ( DISTINCT )
2425 		{
2426 			int iCount = 1;
2427 			if ( bGrouped )
2428 				iCount = (int)tEntry.GetAttr ( m_tLocDistinct );
2429 			m_tUniq.Add ( SphGroupedValue_t ( uGroupKey, tEntry.GetAttr ( m_tDistinctLoc ), iCount ) ); // OPTIMIZE! use simpler locator here?
2430 		}
2431 		CHECKINTEGRITY();
2432 		// it's a dupe anyway, so we shouldn't update total matches count
2433 		if ( ppMatch )
2434 			return false;
2435 
2436 		// do add
2437 		int iNew = GetFreePos ( false );
2438 		assert ( iNew>=0 && iNew<m_iSize );
2439 
2440 		CSphMatch & tNew = m_pData [ iNew ];
2441 		m_tSchema.CloneMatch ( &tNew, tEntry );
2442 
2443 		m_dGroupByList [ iNew ] = -1;
2444 		m_dGroupsLen [ iNew ] = 1;
2445 
2446 		if_const ( NOTIFICATIONS )
2447 			m_iJustPushed = tNew.m_uDocID;
2448 
2449 		if ( !bGrouped )
2450 		{
2451 			tNew.SetAttr ( m_tLocGroupby, uGroupKey );
2452 			tNew.SetAttr ( m_tLocCount, 1 );
2453 			if_const ( DISTINCT )
2454 				tNew.SetAttr ( m_tLocDistinct, 0 );
2455 		} else
2456 		{
2457 			m_uLastGroupKey = uGroupKey;
2458 			ARRAY_FOREACH ( i, m_dAggregates )
2459 				m_dAggregates[i]->Ungroup ( &tNew );
2460 		}
2461 
2462 		m_hGroup2Match.Add ( &tNew, uGroupKey );
2463 		m_iTotal++;
2464 		CHECKINTEGRITY();
2465 		return true;
2466 	}
2467 
CalcAvg(bool bGroup)2468 	void CalcAvg ( bool bGroup )
2469 	{
2470 		if ( !m_dAvgs.GetLength() )
2471 			return;
2472 
2473 		int iHeadMatch;
2474 		int iMatch = iHeadMatch = 0;
2475 		for ( int i=0; i<m_iUsed; ++i )
2476 		{
2477 			CSphMatch * pMatch = m_pData + iMatch;
2478 			ARRAY_FOREACH ( j, m_dAvgs )
2479 			{
2480 				if ( bGroup )
2481 					m_dAvgs[j]->Finalize ( pMatch );
2482 				else
2483 					m_dAvgs[j]->Ungroup ( pMatch );
2484 			}
2485 			iMatch = m_dGroupByList [ iMatch ];
2486 			if ( iMatch<0 )
2487 				iMatch = ++iHeadMatch;
2488 		}
2489 	}
2490 
2491 	// rebuild m_hGroup2Match to point to the subgroups (2-nd elem and further)
2492 	// returns true if any such subgroup found
Hash2nd()2493 	void Hash2nd()
2494 	{
2495 		// let the hash points to the chains from 2-nd elem
2496 		m_hGroup2Match.Reset();
2497 		int iHeads = m_iUsed;
2498 		for ( int i=0; i<iHeads; i++ )
2499 		{
2500 			if ( m_dGroupsLen[i]>1 )
2501 			{
2502 				int iCount = m_dGroupsLen[i];
2503 				int iTail = m_dGroupByList[i];
2504 
2505 				assert ( iTail>=0 );
2506 				assert ( m_pData[iTail].GetAttr ( m_tLocGroupby )==0 ||
2507 						m_pData[i].GetAttr ( m_tLocGroupby )==m_pData[iTail].GetAttr ( m_tLocGroupby ) );
2508 
2509 				m_hGroup2Match.Add ( m_pData + iTail, m_pData[i].GetAttr ( m_tLocGroupby ) );
2510 				iHeads -= iCount - 1;
2511 				m_dGroupsLen[iTail] = iCount;
2512 			}
2513 		}
2514 	}
2515 
2516 
2517 	/// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch * pTo,int iTag)2518 	int Flatten ( CSphMatch * pTo, int iTag )
2519 	{
2520 		CountDistinct ();
2521 		CalcAvg ( true );
2522 
2523 		Hash2nd();
2524 
2525 		SortGroups ();
2526 		CSphVector<IAggrFunc *> dAggrs;
2527 		if ( m_dAggregates.GetLength()!=m_dAvgs.GetLength() )
2528 		{
2529 			dAggrs = m_dAggregates;
2530 			ARRAY_FOREACH ( i, m_dAvgs )
2531 				dAggrs.RemoveValue ( m_dAvgs[i] );
2532 		}
2533 
2534 		const CSphMatch * pBegin = pTo;
2535 		int iTotal = GetLength ();
2536 		int iTopGroupMatch = 0;
2537 		int iEntry = 0;
2538 
2539 		while ( iEntry<iTotal )
2540 		{
2541 			CSphMatch * pMatch = m_pData + iTopGroupMatch;
2542 			ARRAY_FOREACH ( j, dAggrs )
2543 				dAggrs[j]->Finalize ( pMatch );
2544 
2545 			bool bTopPassed = ( !m_pAggrFilter || m_pAggrFilter->Eval ( *pMatch ) );
2546 
2547 			// copy top group match
2548 			if ( bTopPassed )
2549 			{
2550 				m_tSchema.CloneMatch ( pTo, *pMatch );
2551 				if ( iTag>=0 )
2552 					pTo->m_iTag = iTag;
2553 				pTo++;
2554 			}
2555 			iEntry++;
2556 			iTopGroupMatch++;
2557 
2558 			// now look for the next match.
2559 			// In this specific case (2-nd, just after the head)
2560 			// we have to look it in the hash, not in the linked list!
2561 			CSphMatch ** ppMatch = m_hGroup2Match ( pMatch->GetAttr ( m_tLocGroupby ) );
2562 			int iNext = ( ppMatch ? *ppMatch-m_pData : -1 );
2563 			while ( iNext>=0 )
2564 			{
2565 				// copy rest group matches
2566 				if ( bTopPassed )
2567 				{
2568 					m_tPregroup.Combine ( pTo, m_pData+iNext, pMatch );
2569 					if ( iTag>=0 )
2570 						pTo->m_iTag = iTag;
2571 					pTo++;
2572 				}
2573 				iEntry++;
2574 				iNext = m_dGroupByList[iNext];
2575 			}
2576 		}
2577 
2578 		m_iFreeHeads = m_iUsed = 0;
2579 		m_iTotal = 0;
2580 		ARRAY_FOREACH ( i, m_dGroupByList )
2581 		{
2582 			m_dGroupByList[i] = -1;
2583 			m_dGroupsLen[i] = 0;
2584 		}
2585 
2586 		m_hGroup2Match.Reset ();
2587 		if_const ( DISTINCT )
2588 			m_tUniq.Resize ( 0 );
2589 
2590 		return ( pTo-pBegin );
2591 	}
2592 
2593 	/// get entries count
GetLength() const2594 	int GetLength () const
2595 	{
2596 		return Min ( m_iUsed, m_iLimit );
2597 	}
2598 
2599 	/// set group comparator state
SetGroupState(const CSphMatchComparatorState & tState)2600 	void SetGroupState ( const CSphMatchComparatorState & tState )
2601 	{
2602 		m_tGroupSorter.m_fnStrCmp = tState.m_fnStrCmp;
2603 
2604 		// FIXME! manual bitwise copying.. yuck
2605 		for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
2606 		{
2607 			m_tGroupSorter.m_eKeypart[i] = tState.m_eKeypart[i];
2608 			m_tGroupSorter.m_tLocator[i] = tState.m_tLocator[i];
2609 		}
2610 		m_tGroupSorter.m_uAttrDesc = tState.m_uAttrDesc;
2611 		m_tGroupSorter.m_iNow = tState.m_iNow;
2612 
2613 		// check whether we sort by distinct
2614 		if_const ( DISTINCT && m_tDistinctLoc.m_iBitOffset>=0 )
2615 			for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
2616 				if ( m_tGroupSorter.m_tLocator[i].m_iBitOffset==m_tDistinctLoc.m_iBitOffset )
2617 				{
2618 					m_bSortByDistinct = true;
2619 					break;
2620 				}
2621 	}
2622 
2623 protected:
2624 	/// count distinct values if necessary
CountDistinct()2625 	void CountDistinct ()
2626 	{
2627 		if_const ( DISTINCT )
2628 		{
2629 			m_tUniq.Sort ();
2630 			SphGroupKey_t uGroup;
2631 			for ( int iCount = m_tUniq.CountStart ( &uGroup ); iCount; iCount = m_tUniq.CountNext ( &uGroup ) )
2632 			{
2633 				CSphMatch ** ppMatch = m_hGroup2Match(uGroup);
2634 				if ( ppMatch )
2635 					(*ppMatch)->SetAttr ( m_tLocDistinct, iCount );
2636 			}
2637 		}
2638 	}
2639 
2640 	/// cut worst N groups off the buffer tail
CutWorst(int iBound)2641 	void CutWorst ( int iBound )
2642 	{
2643 #ifndef NDEBUG
2644 		++m_iruns;
2645 #endif
2646 		CHECKINTEGRITY ();
2647 
2648 		// sort groups
2649 		if ( m_bSortByDistinct )
2650 			CountDistinct ();
2651 
2652 		Hash2nd();
2653 
2654 		CalcAvg ( true );
2655 		SortGroups ();
2656 		CalcAvg ( false );
2657 
2658 		int iHead = 0;
2659 		for ( int iLimit=0; iLimit<iBound; iHead++ )
2660 		{
2661 			const CSphMatch * pMatch = m_pData + iHead;
2662 			CSphMatch ** ppTailMatch = m_hGroup2Match ( pMatch->GetAttr ( m_tLocGroupby ) );
2663 			int iCount = 1;
2664 			int iTail = -1;
2665 			if ( ppTailMatch )
2666 			{
2667 				assert ( (*ppTailMatch)->GetAttr ( m_tLocGroupby )==0 ||
2668 						pMatch->GetAttr ( m_tLocGroupby )==(*ppTailMatch)->GetAttr ( m_tLocGroupby ) );
2669 				iTail = (*ppTailMatch) - m_pData;
2670 				iCount = m_dGroupsLen[iTail];
2671 				assert ( iCount>0 );
2672 			}
2673 
2674 			// whole chain fits limit
2675 			if ( ( iLimit + iCount )<=iBound )
2676 			{
2677 				m_dGroupByList[iHead] = iTail;
2678 				m_dGroupsLen[iHead] = iCount;
2679 				iLimit += iCount;
2680 				continue;
2681 			}
2682 
2683 			// only head match fits limit but not tail match(es)
2684 			if ( ( iLimit + 1 )==iBound )
2685 			{
2686 				m_dGroupByList[iHead] = -1;
2687 				m_dGroupsLen[iHead] = 1;
2688 
2689 				FreeMatchChain ( iTail );
2690 				iHead++;
2691 				break;
2692 			}
2693 
2694 			// part of tail fits limit - it our last chain
2695 			// fix-up chain to fits limit
2696 			assert ( iBound-iLimit<=iCount );
2697 			iCount = iBound - iLimit;
2698 			m_dGroupByList[iHead] = iTail;
2699 			m_dGroupsLen[iHead] = iCount;
2700 
2701 			iCount--;
2702 			int iPrev = iTail;
2703 			while ( iCount>0 )
2704 			{
2705 				iPrev = iTail;
2706 				iTail = m_dGroupByList[iTail];
2707 				iCount--;
2708 				assert ( !iCount || iTail>=0 );
2709 			}
2710 			m_dGroupByList[iPrev] = -1;
2711 
2712 			iHead++;
2713 			FreeMatchChain ( iTail );
2714 			break;
2715 		}
2716 
2717 		if_const ( DISTINCT )
2718 		{
2719 			int iCount = m_iUsed - iHead;
2720 			CSphFixedVector<SphGroupKey_t> dRemove ( iCount );
2721 			for ( int i=0; i<iCount; i++ )
2722 				dRemove[i] = m_pData [ i + iHead ].GetAttr ( m_tLocGroupby );
2723 
2724 			if ( !m_bSortByDistinct )
2725 				m_tUniq.Sort ();
2726 
2727 			m_tUniq.Compact ( &dRemove[0], iCount );
2728 		}
2729 
2730 		// cleanup chains after limit
2731 		for ( int i=iHead; i<m_iFreeHeads; ++i )
2732 		{
2733 			CSphMatch * pMatch = m_pData + i;
2734 			CSphMatch ** ppTailMatch = m_hGroup2Match ( pMatch->GetAttr ( m_tLocGroupby ) );
2735 			if ( ppTailMatch )
2736 			{
2737 				assert ( ( *ppTailMatch )->GetAttr ( m_tLocGroupby )==0 || pMatch->GetAttr ( m_tLocGroupby )==( *ppTailMatch )->GetAttr ( m_tLocGroupby ) );
2738 				int iTail = ( *ppTailMatch ) - m_pData;
2739 				assert ( m_dGroupsLen[iTail]>0 );
2740 				FreeMatchChain ( iTail );
2741 			}
2742 
2743 			if_const ( NOTIFICATIONS )
2744 				m_dJustPopped.Add ( pMatch->m_uDocID );
2745 			m_tSchema.FreeStringPtrs ( pMatch );
2746 			m_dGroupByList[i] = -1;
2747 			m_dGroupsLen[i] = 0;
2748 		}
2749 
2750 		// cleanup chain lengths after hash2nd
2751 		for ( int i=m_iSize; i<m_dGroupsLen.GetLength(); i++ )
2752 			m_dGroupsLen[i] = 0;
2753 
2754 		// rehash
2755 		m_hGroup2Match.Reset ();
2756 		for ( int i=0; i<iHead; i++ )
2757 			m_hGroup2Match.Add ( m_pData + i, m_pData[i].GetAttr ( m_tLocGroupby ) );
2758 
2759 		// cut groups
2760 		m_iUsed = iBound;
2761 		m_iFreeHeads = iHead;
2762 		CHECKINTEGRITY();
2763 	}
2764 
2765 	/// sort groups buffer
SortGroups()2766 	void SortGroups ()
2767 	{
2768 		sphSort ( m_pData, m_iFreeHeads, m_tGroupSorter, m_tGroupSorter );
2769 	}
2770 
Finalize(ISphMatchProcessor & tProcessor,bool)2771 	virtual void Finalize ( ISphMatchProcessor & tProcessor, bool )
2772 	{
2773 		if ( !GetLength() )
2774 			return;
2775 
2776 		if ( m_iUsed>m_iLimit )
2777 			CutWorst ( m_iLimit );
2778 
2779 		for ( int iMatch=0; iMatch<m_iFreeHeads; iMatch++ )
2780 		{
2781 			// this is head match
2782 			CSphMatch * pMatch = m_pData + iMatch;
2783 			tProcessor.Process ( pMatch );
2784 
2785 			int iNext = m_dGroupByList[iMatch];
2786 			int iCount = m_dGroupsLen[iMatch] - 1;
2787 			while ( iCount>0 )
2788 			{
2789 				tProcessor.Process ( m_pData + iNext );
2790 				iNext = m_dGroupByList[iNext];
2791 				iCount--;
2792 			}
2793 		}
2794 	}
2795 };
2796 
2797 
2798 /// match sorter with k-buffering and group-by for MVAs
2799 template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
2800 class CSphKBufferMVAGroupSorter : public CSphKBufferGroupSorter < COMPGROUP, DISTINCT, NOTIFICATIONS >
2801 {
2802 protected:
2803 	const DWORD *		m_pMva;		///< pointer to MVA pool for incoming matches
2804 	bool				m_bArenaProhibit;
2805 	CSphAttrLocator		m_tMvaLocator;
2806 	bool				m_bMva64;
2807 
2808 public:
2809 	/// ctor
CSphKBufferMVAGroupSorter(const ISphMatchComparator * pComp,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings)2810 	CSphKBufferMVAGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
2811 		: CSphKBufferGroupSorter < COMPGROUP, DISTINCT, NOTIFICATIONS > ( pComp, pQuery, tSettings )
2812 		, m_pMva ( NULL )
2813 		, m_bArenaProhibit ( false )
2814 		, m_bMva64 ( tSettings.m_bMva64 )
2815 	{
2816 		this->m_pGrouper->GetLocator ( m_tMvaLocator );
2817 	}
2818 
2819 	/// check if this sorter does groupby
IsGroupby() const2820 	virtual bool IsGroupby () const
2821 	{
2822 		return true;
2823 	}
2824 
2825 	/// set MVA pool for subsequent matches
SetMVAPool(const DWORD * pMva,bool bArenaProhibit)2826 	void SetMVAPool ( const DWORD * pMva, bool bArenaProhibit )
2827 	{
2828 		m_pMva = pMva;
2829 		m_bArenaProhibit = bArenaProhibit;
2830 	}
2831 
2832 	/// add entry to the queue
Push(const CSphMatch & tEntry)2833 	virtual bool Push ( const CSphMatch & tEntry )
2834 	{
2835 		assert ( m_pMva );
2836 		if ( !m_pMva )
2837 			return false;
2838 
2839 		// get that list
2840 		// FIXME! OPTIMIZE! use simpler locator than full bits/count here
2841 		// FIXME! hardcoded MVA type, so here's MVA_DOWNSIZE marker for searching
2842 		const DWORD * pValues = tEntry.GetAttrMVA ( this->m_tMvaLocator, m_pMva, m_bArenaProhibit ); // (this pointer is for gcc; it doesn't work otherwise)
2843 		if ( !pValues )
2844 			return false;
2845 
2846 		DWORD iValues = *pValues++;
2847 
2848 		bool bRes = false;
2849 		if ( m_bMva64 )
2850 		{
2851 			assert ( ( iValues%2 )==0 );
2852 			for ( ;iValues>0; iValues-=2, pValues+=2 )
2853 			{
2854 				int64_t iMva = MVA_UPSIZE ( pValues );
2855 				SphGroupKey_t uGroupkey = this->m_pGrouper->KeyFromValue ( iMva );
2856 				bRes |= this->PushEx ( tEntry, uGroupkey, false, false );
2857 			}
2858 
2859 		} else
2860 		{
2861 			while ( iValues-- )
2862 			{
2863 				SphGroupKey_t uGroupkey = this->m_pGrouper->KeyFromValue ( *pValues++ );
2864 				bRes |= this->PushEx ( tEntry, uGroupkey, false, false );
2865 			}
2866 		}
2867 		return bRes;
2868 	}
2869 
2870 	/// add pre-grouped entry to the queue
PushGrouped(const CSphMatch & tEntry,bool bNewSet)2871 	virtual bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet )
2872 	{
2873 		// re-group it based on the group key
2874 		// (first 'this' is for icc; second 'this' is for gcc)
2875 		return this->PushEx ( tEntry, tEntry.GetAttr ( this->m_tLocGroupby ), true, bNewSet );
2876 	}
2877 };
2878 
2879 
2880 /// match sorter with k-buffering and group-by for JSON arrays
2881 template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
2882 class CSphKBufferJsonGroupSorter : public CSphKBufferGroupSorter < COMPGROUP, DISTINCT, NOTIFICATIONS >
2883 {
2884 public:
2885 	/// ctor
CSphKBufferJsonGroupSorter(const ISphMatchComparator * pComp,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings)2886 	CSphKBufferJsonGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
2887 		: CSphKBufferGroupSorter < COMPGROUP, DISTINCT, NOTIFICATIONS > ( pComp, pQuery, tSettings )
2888 	{}
2889 
2890 	/// check if this sorter does groupby
IsGroupby() const2891 	virtual bool IsGroupby () const
2892 	{
2893 		return true;
2894 	}
2895 
2896 	/// add entry to the queue
Push(const CSphMatch & tMatch)2897 	virtual bool Push ( const CSphMatch & tMatch )
2898 	{
2899 		bool bRes = false;
2900 
2901 		int iLen;
2902 		char sBuf[32];
2903 
2904 		SphGroupKey_t uGroupkey = this->m_pGrouper->KeyFromMatch ( tMatch );
2905 
2906 		int64_t iValue = (int64_t)uGroupkey;
2907 		const BYTE * pStrings = ((CSphGrouperJsonField*)this->m_pGrouper)->m_pStrings;
2908 		const BYTE * pValue = pStrings + ( iValue & 0xffffffff );
2909 		ESphJsonType eRes = (ESphJsonType)( iValue >> 32 );
2910 
2911 		switch ( eRes )
2912 		{
2913 		case JSON_ROOT:
2914 			{
2915 				iLen = sphJsonNodeSize ( JSON_ROOT, pValue );
2916 				bool bEmpty = iLen==5; // mask and JSON_EOF
2917 				uGroupkey = bEmpty ? 0 : sphFNV64 ( pValue, iLen );
2918 				return this->PushEx ( tMatch, uGroupkey, false, false, bEmpty ? NULL : &iValue );
2919 			}
2920 		case JSON_STRING:
2921 		case JSON_OBJECT:
2922 		case JSON_MIXED_VECTOR:
2923 			iLen = sphJsonUnpackInt ( &pValue );
2924 			uGroupkey = ( iLen==1 && eRes!=JSON_STRING ) ? 0 : sphFNV64 ( pValue, iLen );
2925 			return this->PushEx ( tMatch, uGroupkey, false, false, ( iLen==1 && eRes!=JSON_STRING ) ? 0: &iValue );
2926 		case JSON_STRING_VECTOR:
2927 			{
2928 				sphJsonUnpackInt ( &pValue );
2929 				iLen = sphJsonUnpackInt ( &pValue );
2930 				for ( int i=0;i<iLen;i++ )
2931 				{
2932 					DWORD uOff = pValue-pStrings;
2933 					int64_t iValue = ( ( (int64_t)uOff ) | ( ( (int64_t)JSON_STRING )<<32 ) );
2934 					int iStrLen = sphJsonUnpackInt ( &pValue );
2935 					uGroupkey = sphFNV64 ( pValue, iStrLen );
2936 					bRes |= this->PushEx ( tMatch, uGroupkey, false, false, &iValue );
2937 					pValue += iStrLen;
2938 				}
2939 				return bRes;
2940 			}
2941 		case JSON_INT32:
2942 			uGroupkey = sphFNV64 ( (BYTE*)FormatInt ( sBuf, (int)sphGetDword(pValue) ) );
2943 			break;
2944 		case JSON_INT64:
2945 			uGroupkey = sphFNV64 ( (BYTE*)FormatInt ( sBuf, (int)sphJsonLoadBigint ( &pValue ) ) );
2946 			break;
2947 		case JSON_DOUBLE:
2948 			snprintf ( sBuf, sizeof(sBuf), "%f", sphQW2D ( sphJsonLoadBigint ( &pValue ) ) );
2949 			uGroupkey = sphFNV64 ( (const BYTE*)sBuf );
2950 			break;
2951 		case JSON_INT32_VECTOR:
2952 			{
2953 				iLen = sphJsonUnpackInt ( &pValue );
2954 				int * p = (int*)pValue;
2955 				DWORD uOff = pValue-pStrings;
2956 				for ( int i=0;i<iLen;i++ )
2957 				{
2958 					int64_t iPacked = ( ( (int64_t)uOff ) | ( ( (int64_t)JSON_INT32 )<<32 ) );
2959 					uGroupkey = *p++;
2960 					bRes |= this->PushEx ( tMatch, uGroupkey, false, false, &iPacked );
2961 					uOff+=4;
2962 				}
2963 				return bRes;
2964 			}
2965 			break;
2966 		case JSON_INT64_VECTOR:
2967 		case JSON_DOUBLE_VECTOR:
2968 			{
2969 				iLen = sphJsonUnpackInt ( &pValue );
2970 				int64_t * p = (int64_t*)pValue;
2971 				DWORD uOff = pValue-pStrings;
2972 				ESphJsonType eType = eRes==JSON_INT64_VECTOR ? JSON_INT64 : JSON_DOUBLE;
2973 				for ( int i=0;i<iLen;i++ )
2974 				{
2975 					int64_t iPacked = ( ( (int64_t)uOff ) | ( ( (int64_t)eType )<<32 ) );
2976 					uGroupkey = *p++;
2977 					bRes |= this->PushEx ( tMatch, uGroupkey, false, false, &iPacked );
2978 					uOff+=8;
2979 				}
2980 				return bRes;
2981 			}
2982 			break;
2983 		default:
2984 			uGroupkey = 0;
2985 			iValue = 0;
2986 			break;
2987 		}
2988 
2989 		bRes |= this->PushEx ( tMatch, uGroupkey, false, false, &iValue );
2990 		return bRes;
2991 	}
2992 
2993 	/// add pre-grouped entry to the queue
PushGrouped(const CSphMatch & tEntry,bool bNewSet)2994 	virtual bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet )
2995 	{
2996 		// re-group it based on the group key
2997 		// (first 'this' is for icc; second 'this' is for gcc)
2998 		return this->PushEx ( tEntry, tEntry.GetAttr ( this->m_tLocGroupby ), true, bNewSet );
2999 	}
3000 };
3001 
3002 
3003 /// implicit group-by sorter
3004 template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
3005 class CSphImplicitGroupSorter : public ISphMatchSorter, ISphNoncopyable, protected CSphGroupSorterSettings
3006 {
3007 protected:
3008 	CSphMatch		m_tData;
3009 	bool			m_bDataInitialized;
3010 
3011 	CSphVector<SphUngroupedValue_t>	m_dUniq;
3012 
3013 	CSphVector<IAggrFunc *>		m_dAggregates;
3014 	const ISphFilter *			m_pAggrFilter;				///< aggregate filter for matches on flatten
3015 	MatchCloner_t				m_tPregroup;
3016 
3017 public:
3018 	/// ctor
CSphImplicitGroupSorter(const ISphMatchComparator * DEBUGARG (pComp),const CSphQuery *,const CSphGroupSorterSettings & tSettings)3019 	CSphImplicitGroupSorter ( const ISphMatchComparator * DEBUGARG(pComp), const CSphQuery *, const CSphGroupSorterSettings & tSettings )
3020 		: CSphGroupSorterSettings ( tSettings )
3021 		, m_bDataInitialized ( false )
3022 		, m_pAggrFilter ( tSettings.m_pAggrFilterTrait )
3023 	{
3024 		assert ( DISTINCT==false || tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
3025 		assert ( !pComp );
3026 
3027 		if_const ( NOTIFICATIONS )
3028 			m_dJustPopped.Reserve(1);
3029 
3030 		m_dUniq.Reserve ( 16384 );
3031 		m_iMatchCapacity = 1;
3032 	}
3033 
3034 	/// dtor
~CSphImplicitGroupSorter()3035 	~CSphImplicitGroupSorter ()
3036 	{
3037 		SafeDelete ( m_pAggrFilter );
3038 		ARRAY_FOREACH ( i, m_dAggregates )
3039 			SafeDelete ( m_dAggregates[i] );
3040 	}
3041 
3042 	/// schema setup
SetSchema(CSphRsetSchema & tSchema)3043 	virtual void SetSchema ( CSphRsetSchema & tSchema )
3044 	{
3045 		m_tSchema = tSchema;
3046 		m_tPregroup.SetSchema ( &m_tSchema );
3047 		m_tPregroup.m_dAttrsRaw.Add ( m_tLocGroupby );
3048 		m_tPregroup.m_dAttrsRaw.Add ( m_tLocCount );
3049 		if_const ( DISTINCT )
3050 		{
3051 			m_tPregroup.m_dAttrsRaw.Add ( m_tLocDistinct );
3052 		}
3053 
3054 		CSphVector<IAggrFunc *> dTmp;
3055 		ESphSortKeyPart dTmpKeypart[CSphMatchComparatorState::MAX_ATTRS];
3056 		CSphAttrLocator dTmpLocator[CSphMatchComparatorState::MAX_ATTRS];
3057 		ExtractAggregates ( m_tSchema, m_tLocCount, dTmpKeypart, dTmpLocator, m_dAggregates, dTmp, m_tPregroup );
3058 		assert ( !dTmp.GetLength() );
3059 	}
3060 
GetDataLength() const3061 	int GetDataLength () const
3062 	{
3063 		return 1;
3064 	}
3065 
UsesAttrs() const3066 	bool UsesAttrs () const
3067 	{
3068 		return true;
3069 	}
3070 
3071 	/// check if this sorter does groupby
IsGroupby() const3072 	virtual bool IsGroupby () const
3073 	{
3074 		return true;
3075 	}
3076 
CanMulti() const3077 	virtual bool CanMulti () const
3078 	{
3079 		return true;
3080 	}
3081 
3082 	/// set string pool pointer (for string+groupby sorters)
SetStringPool(const BYTE *)3083 	void SetStringPool ( const BYTE * )
3084 	{
3085 	}
3086 
3087 	/// add entry to the queue
Push(const CSphMatch & tEntry)3088 	virtual bool Push ( const CSphMatch & tEntry )
3089 	{
3090 		return PushEx ( tEntry, false );
3091 	}
3092 
3093 	/// add grouped entry to the queue. bNewSet indicates the beginning of resultset returned by an agent.
PushGrouped(const CSphMatch & tEntry,bool)3094 	virtual bool PushGrouped ( const CSphMatch & tEntry, bool )
3095 	{
3096 		return PushEx ( tEntry, true );
3097 	}
3098 
3099 	/// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch * pTo,int iTag)3100 	virtual int Flatten ( CSphMatch * pTo, int iTag )
3101 	{
3102 		assert ( m_bDataInitialized );
3103 
3104 		CountDistinct ();
3105 
3106 		ARRAY_FOREACH ( j, m_dAggregates )
3107 			m_dAggregates[j]->Finalize ( &m_tData );
3108 
3109 		int iCopied = 0;
3110 		if ( !m_pAggrFilter || m_pAggrFilter->Eval ( m_tData ) )
3111 		{
3112 			iCopied = 1;
3113 			m_tSchema.CloneMatch ( pTo, m_tData );
3114 			m_tSchema.FreeStringPtrs ( &m_tData );
3115 			if ( iTag>=0 )
3116 				pTo->m_iTag = iTag;
3117 		}
3118 
3119 		m_iTotal = 0;
3120 		m_bDataInitialized = false;
3121 
3122 		if_const ( DISTINCT )
3123 			m_dUniq.Resize(0);
3124 
3125 		return iCopied;
3126 	}
3127 
3128 	/// finalize, perform final sort/cut as needed
Finalize(ISphMatchProcessor & tProcessor,bool)3129 	void Finalize ( ISphMatchProcessor & tProcessor, bool )
3130 	{
3131 		if ( !GetLength() )
3132 			return;
3133 
3134 		tProcessor.Process ( &m_tData );
3135 	}
3136 
3137 	/// get entries count
GetLength() const3138 	int GetLength () const
3139 	{
3140 		return m_bDataInitialized ? 1 : 0;
3141 	}
3142 
3143 protected:
3144 	/// add entry to the queue
PushEx(const CSphMatch & tEntry,bool bGrouped)3145 	bool PushEx ( const CSphMatch & tEntry, bool bGrouped )
3146 	{
3147 		if_const ( NOTIFICATIONS )
3148 		{
3149 			m_iJustPushed = 0;
3150 			m_dJustPopped.Resize(0);
3151 		}
3152 
3153 		if ( m_bDataInitialized )
3154 		{
3155 			assert ( m_tData.m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
3156 
3157 			if ( bGrouped )
3158 			{
3159 				// it's already grouped match
3160 				// sum grouped matches count
3161 				m_tData.SetAttr ( m_tLocCount, m_tData.GetAttr ( m_tLocCount ) + tEntry.GetAttr ( m_tLocCount ) ); // OPTIMIZE! AddAttr()?
3162 			} else
3163 			{
3164 				// it's a simple match
3165 				// increase grouped matches count
3166 				m_tData.SetAttr ( m_tLocCount, 1 + m_tData.GetAttr ( m_tLocCount ) ); // OPTIMIZE! IncAttr()?
3167 			}
3168 
3169 			// update aggregates
3170 			ARRAY_FOREACH ( i, m_dAggregates )
3171 				m_dAggregates[i]->Update ( &m_tData, &tEntry, bGrouped );
3172 
3173 			// if new entry is more relevant, update from it
3174 			if ( tEntry.m_uDocID<m_tData.m_uDocID )
3175 			{
3176 				if_const ( NOTIFICATIONS )
3177 				{
3178 					m_iJustPushed = tEntry.m_uDocID;
3179 					m_dJustPopped.Add ( m_tData.m_uDocID );
3180 				}
3181 
3182 				m_tPregroup.Clone ( &m_tData, &tEntry );
3183 			}
3184 		}
3185 
3186 		// submit actual distinct value in all cases
3187 		if_const ( DISTINCT )
3188 		{
3189 			int iCount = 1;
3190 			if ( bGrouped )
3191 				iCount = (int)tEntry.GetAttr ( m_tLocDistinct );
3192 			m_dUniq.Add ( SphUngroupedValue_t ( tEntry.GetAttr ( m_tDistinctLoc ), iCount ) ); // OPTIMIZE! use simpler locator here?
3193 		}
3194 
3195 		// it's a dupe anyway, so we shouldn't update total matches count
3196 		if ( m_bDataInitialized )
3197 			return false;
3198 
3199 		// add first
3200 		m_tSchema.CloneMatch ( &m_tData, tEntry );
3201 
3202 		if_const ( NOTIFICATIONS )
3203 			m_iJustPushed = m_tData.m_uDocID;
3204 
3205 		if ( !bGrouped )
3206 		{
3207 			m_tData.SetAttr ( m_tLocGroupby, 1 ); // fake group number
3208 			m_tData.SetAttr ( m_tLocCount, 1 );
3209 			if_const ( DISTINCT )
3210 				m_tData.SetAttr ( m_tLocDistinct, 0 );
3211 		} else
3212 		{
3213 			ARRAY_FOREACH ( i, m_dAggregates )
3214 				m_dAggregates[i]->Ungroup ( &m_tData );
3215 		}
3216 
3217 		m_bDataInitialized = true;
3218 		m_iTotal++;
3219 		return true;
3220 	}
3221 
3222 	/// count distinct values if necessary
CountDistinct()3223 	void CountDistinct ()
3224 	{
3225 		if_const ( DISTINCT )
3226 		{
3227 			assert ( m_bDataInitialized );
3228 
3229 			m_dUniq.Sort ();
3230 			int iCount = 0;
3231 			ARRAY_FOREACH ( i, m_dUniq )
3232 			{
3233 				if ( i>0 && m_dUniq[i-1]==m_dUniq[i] )
3234 					continue;
3235 				iCount += m_dUniq[i].m_iCount;
3236 			}
3237 
3238 			m_tData.SetAttr ( m_tLocDistinct, iCount );
3239 		}
3240 	}
3241 };
3242 
3243 //////////////////////////////////////////////////////////////////////////
3244 // PLAIN SORTING FUNCTORS
3245 //////////////////////////////////////////////////////////////////////////
3246 
3247 /// match sorter
3248 struct MatchRelevanceLt_fn : public ISphMatchComparator
3249 {
VirtualIsLessMatchRelevanceLt_fn3250 	virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3251 	{
3252 		return IsLess ( a, b, t );
3253 	}
3254 
IsLessMatchRelevanceLt_fn3255 	static bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & )
3256 	{
3257 		if ( a.m_iWeight!=b.m_iWeight )
3258 			return a.m_iWeight < b.m_iWeight;
3259 
3260 		return a.m_uDocID > b.m_uDocID;
3261 	}
3262 };
3263 
3264 
3265 /// match sorter
3266 struct MatchAttrLt_fn : public ISphMatchComparator
3267 {
VirtualIsLessMatchAttrLt_fn3268 	virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3269 	{
3270 		return IsLess ( a, b, t );
3271 	}
3272 
IsLessMatchAttrLt_fn3273 	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3274 	{
3275 		if ( t.m_eKeypart[0]!=SPH_KEYPART_STRING )
3276 		{
3277 			SphAttr_t aa = a.GetAttr ( t.m_tLocator[0] );
3278 			SphAttr_t bb = b.GetAttr ( t.m_tLocator[0] );
3279 			if ( aa!=bb )
3280 				return aa<bb;
3281 		} else
3282 		{
3283 			int iCmp = t.CmpStrings ( a, b, 0 );
3284 			if ( iCmp!=0 )
3285 				return iCmp<0;
3286 		}
3287 
3288 		if ( a.m_iWeight!=b.m_iWeight )
3289 			return a.m_iWeight < b.m_iWeight;
3290 
3291 		return a.m_uDocID > b.m_uDocID;
3292 	}
3293 };
3294 
3295 
3296 /// match sorter
3297 struct MatchAttrGt_fn : public ISphMatchComparator
3298 {
VirtualIsLessMatchAttrGt_fn3299 	virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3300 	{
3301 		return IsLess ( a, b, t );
3302 	}
3303 
IsLessMatchAttrGt_fn3304 	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3305 	{
3306 		if ( t.m_eKeypart[0]!=SPH_KEYPART_STRING )
3307 		{
3308 			SphAttr_t aa = a.GetAttr ( t.m_tLocator[0] );
3309 			SphAttr_t bb = b.GetAttr ( t.m_tLocator[0] );
3310 			if ( aa!=bb )
3311 				return aa>bb;
3312 		} else
3313 		{
3314 			int iCmp = t.CmpStrings ( a, b, 0 );
3315 			if ( iCmp!=0 )
3316 				return iCmp>0;
3317 		}
3318 
3319 		if ( a.m_iWeight!=b.m_iWeight )
3320 			return a.m_iWeight < b.m_iWeight;
3321 
3322 		return a.m_uDocID > b.m_uDocID;
3323 	}
3324 };
3325 
3326 
3327 /// match sorter
3328 struct MatchTimeSegments_fn : public ISphMatchComparator
3329 {
VirtualIsLessMatchTimeSegments_fn3330 	virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3331 	{
3332 		return IsLess ( a, b, t );
3333 	}
3334 
IsLessMatchTimeSegments_fn3335 	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3336 	{
3337 		SphAttr_t aa = a.GetAttr ( t.m_tLocator[0] );
3338 		SphAttr_t bb = b.GetAttr ( t.m_tLocator[0] );
3339 		int iA = GetSegment ( aa, t.m_iNow );
3340 		int iB = GetSegment ( bb, t.m_iNow );
3341 		if ( iA!=iB )
3342 			return iA > iB;
3343 
3344 		if ( a.m_iWeight!=b.m_iWeight )
3345 			return a.m_iWeight < b.m_iWeight;
3346 
3347 		if ( aa!=bb )
3348 			return aa<bb;
3349 
3350 		return a.m_uDocID > b.m_uDocID;
3351 	}
3352 
3353 protected:
GetSegmentMatchTimeSegments_fn3354 	static inline int GetSegment ( SphAttr_t iStamp, SphAttr_t iNow )
3355 	{
3356 		if ( iStamp>=iNow-3600 ) return 0; // last hour
3357 		if ( iStamp>=iNow-24*3600 ) return 1; // last day
3358 		if ( iStamp>=iNow-7*24*3600 ) return 2; // last week
3359 		if ( iStamp>=iNow-30*24*3600 ) return 3; // last month
3360 		if ( iStamp>=iNow-90*24*3600 ) return 4; // last 3 months
3361 		return 5; // everything else
3362 	}
3363 };
3364 
3365 
3366 /// match sorter
3367 struct MatchExpr_fn : public ISphMatchComparator
3368 {
VirtualIsLessMatchExpr_fn3369 	virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3370 	{
3371 		return IsLess ( a, b, t );
3372 	}
3373 
IsLessMatchExpr_fn3374 	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3375 	{
3376 		float aa = a.GetAttrFloat ( t.m_tLocator[0] ); // FIXME! OPTIMIZE!!! simplified (dword-granular) getter could be used here
3377 		float bb = b.GetAttrFloat ( t.m_tLocator[0] );
3378 		if ( aa!=bb )
3379 			return aa<bb;
3380 		return a.m_uDocID>b.m_uDocID;
3381 	}
3382 };
3383 
3384 /////////////////////////////////////////////////////////////////////////////
3385 
3386 #define SPH_TEST_PAIR(_aa,_bb,_idx ) \
3387 	if ( (_aa)!=(_bb) ) \
3388 		return ( (t.m_uAttrDesc >> (_idx)) & 1 ) ^ ( (_aa) > (_bb) );
3389 
3390 
3391 #define SPH_TEST_KEYPART(_idx) \
3392 	switch ( t.m_eKeypart[_idx] ) \
3393 	{ \
3394 		case SPH_KEYPART_ID:		SPH_TEST_PAIR ( a.m_uDocID, b.m_uDocID, _idx ); break; \
3395 		case SPH_KEYPART_WEIGHT:	SPH_TEST_PAIR ( a.m_iWeight, b.m_iWeight, _idx ); break; \
3396 		case SPH_KEYPART_INT: \
3397 		{ \
3398 			SphAttr_t aa = a.GetAttr ( t.m_tLocator[_idx] ); \
3399 			SphAttr_t bb = b.GetAttr ( t.m_tLocator[_idx] ); \
3400 			SPH_TEST_PAIR ( aa, bb, _idx ); \
3401 			break; \
3402 		} \
3403 		case SPH_KEYPART_FLOAT: \
3404 		{ \
3405 			float aa = a.GetAttrFloat ( t.m_tLocator[_idx] ); \
3406 			float bb = b.GetAttrFloat ( t.m_tLocator[_idx] ); \
3407 			SPH_TEST_PAIR ( aa, bb, _idx ) \
3408 			break; \
3409 		} \
3410 		case SPH_KEYPART_STRINGPTR: \
3411 		case SPH_KEYPART_STRING: \
3412 		{ \
3413 			int iCmp = t.CmpStrings ( a, b, _idx ); \
3414 			if ( iCmp!=0 ) \
3415 				return ( ( t.m_uAttrDesc >> (_idx) ) & 1 ) ^ ( iCmp>0 ); \
3416 			break; \
3417 		} \
3418 	}
3419 
3420 
3421 struct MatchGeneric2_fn : public ISphMatchComparator
3422 {
VirtualIsLessMatchGeneric2_fn3423 	virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3424 	{
3425 		return IsLess ( a, b, t );
3426 	}
3427 
IsLessMatchGeneric2_fn3428 	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3429 	{
3430 		SPH_TEST_KEYPART(0);
3431 		SPH_TEST_KEYPART(1);
3432 		return a.m_uDocID>b.m_uDocID;
3433 	}
3434 };
3435 
3436 
3437 struct MatchGeneric3_fn : public ISphMatchComparator
3438 {
VirtualIsLessMatchGeneric3_fn3439 	virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3440 	{
3441 		return IsLess ( a, b, t );
3442 	}
3443 
IsLessMatchGeneric3_fn3444 	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3445 	{
3446 		SPH_TEST_KEYPART(0);
3447 		SPH_TEST_KEYPART(1);
3448 		SPH_TEST_KEYPART(2);
3449 		return a.m_uDocID>b.m_uDocID;
3450 	}
3451 };
3452 
3453 
3454 struct MatchGeneric4_fn : public ISphMatchComparator
3455 {
VirtualIsLessMatchGeneric4_fn3456 	virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3457 	{
3458 		return IsLess ( a, b, t );
3459 	}
3460 
IsLessMatchGeneric4_fn3461 	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3462 	{
3463 		SPH_TEST_KEYPART(0);
3464 		SPH_TEST_KEYPART(1);
3465 		SPH_TEST_KEYPART(2);
3466 		SPH_TEST_KEYPART(3);
3467 		return a.m_uDocID>b.m_uDocID;
3468 	}
3469 };
3470 
3471 
3472 struct MatchGeneric5_fn : public ISphMatchComparator
3473 {
VirtualIsLessMatchGeneric5_fn3474 	virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3475 	{
3476 		return IsLess ( a, b, t );
3477 	}
3478 
IsLessMatchGeneric5_fn3479 	static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3480 	{
3481 		SPH_TEST_KEYPART(0);
3482 		SPH_TEST_KEYPART(1);
3483 		SPH_TEST_KEYPART(2);
3484 		SPH_TEST_KEYPART(3);
3485 		SPH_TEST_KEYPART(4);
3486 		return a.m_uDocID>b.m_uDocID;
3487 	}
3488 };
3489 
3490 //////////////////////////////////////////////////////////////////////////
3491 // SORT CLAUSE PARSER
3492 //////////////////////////////////////////////////////////////////////////
3493 
3494 static const int MAX_SORT_FIELDS = 5; // MUST be in sync with CSphMatchComparatorState::m_iAttr
3495 
3496 
3497 class SortClauseTokenizer_t
3498 {
3499 protected:
3500 	const char * m_pCur;
3501 	const char * m_pMax;
3502 	char * m_pBuf;
3503 
3504 protected:
ToLower(char c)3505 	char ToLower ( char c )
3506 	{
3507 		// 0..9, A..Z->a..z, _, a..z, @, .
3508 		if ( ( c>='0' && c<='9' ) || ( c>='a' && c<='z' ) || c=='_' || c=='@' || c=='.' || c=='[' || c==']' || c=='\'' || c=='\"' || c=='(' || c==')' || c=='*' )
3509 			return c;
3510 		if ( c>='A' && c<='Z' )
3511 			return c-'A'+'a';
3512 		return 0;
3513 	}
3514 
3515 public:
SortClauseTokenizer_t(const char * sBuffer)3516 	explicit SortClauseTokenizer_t ( const char * sBuffer )
3517 	{
3518 		int iLen = strlen(sBuffer);
3519 		m_pBuf = new char [ iLen+1 ];
3520 		m_pMax = m_pBuf+iLen;
3521 		m_pCur = m_pBuf;
3522 
3523 		// make string lowercase but keep case of JSON.field
3524 		bool bJson = false;
3525 		for ( int i=0; i<=iLen; i++ )
3526 		{
3527 			char cSrc = sBuffer[i];
3528 			char cDst = ToLower ( cSrc );
3529 			bJson = ( cSrc=='.' || cSrc=='[' || ( bJson && cDst>0 ) ); // keep case of valid char sequence after '.' and '[' symbols
3530 			m_pBuf[i] = bJson ? cSrc : cDst;
3531 		}
3532 	}
3533 
~SortClauseTokenizer_t()3534 	~SortClauseTokenizer_t ()
3535 	{
3536 		SafeDeleteArray ( m_pBuf );
3537 	}
3538 
GetToken()3539 	const char * GetToken ()
3540 	{
3541 		// skip spaces
3542 		while ( m_pCur<m_pMax && !*m_pCur )
3543 			m_pCur++;
3544 		if ( m_pCur>=m_pMax )
3545 			return NULL;
3546 
3547 		// memorize token start, and move pointer forward
3548 		const char * sRes = m_pCur;
3549 		while ( *m_pCur )
3550 			m_pCur++;
3551 		return sRes;
3552 	}
3553 
IsSparseCount(const char * sTok)3554 	bool IsSparseCount ( const char * sTok )
3555 	{
3556 		const char * sSeq = "(*)";
3557 		for ( ; sTok<m_pMax && *sSeq; sTok++ )
3558 		{
3559 			bool bGotSeq = ( *sSeq==*sTok );
3560 			if ( bGotSeq )
3561 				sSeq++;
3562 
3563 			// stop checking on any non space char outside sequence or sequence end
3564 			if ( ( !bGotSeq && !sphIsSpace ( *sTok ) && *sTok!='\0' ) || !*sSeq )
3565 				break;
3566 		}
3567 
3568 		if ( !*sSeq && sTok+1<m_pMax && !sTok[1] )
3569 		{
3570 			// advance token iterator after composite count(*) token
3571 			m_pCur = sTok+1;
3572 			return true;
3573 		} else
3574 		{
3575 			return false;
3576 		}
3577 	}
3578 };
3579 
3580 
Attr2Keypart(ESphAttr eType)3581 static inline ESphSortKeyPart Attr2Keypart ( ESphAttr eType )
3582 {
3583 	switch ( eType )
3584 	{
3585 		case SPH_ATTR_FLOAT:	return SPH_KEYPART_FLOAT;
3586 		case SPH_ATTR_STRING:	return SPH_KEYPART_STRING;
3587 		case SPH_ATTR_JSON:
3588 		case SPH_ATTR_JSON_FIELD:
3589 		case SPH_ATTR_STRINGPTR: return SPH_KEYPART_STRINGPTR;
3590 		default:				return SPH_KEYPART_INT;
3591 	}
3592 }
3593 
3594 
3595 static const char g_sIntAttrPrefix[] = "@int_str2ptr_";
3596 
3597 
sphParseSortClause(const CSphQuery * pQuery,const char * sClause,const ISphSchema & tSchema,ESphSortFunc & eFunc,CSphMatchComparatorState & tState,CSphString & sError)3598 ESortClauseParseResult sphParseSortClause ( const CSphQuery * pQuery, const char * sClause, const ISphSchema & tSchema,
3599 	ESphSortFunc & eFunc, CSphMatchComparatorState & tState, CSphString & sError )
3600 {
3601 	for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
3602 		tState.m_dAttrs[i] = -1;
3603 
3604 	// mini parser
3605 	SortClauseTokenizer_t tTok ( sClause );
3606 
3607 	bool bField = false; // whether i'm expecting field name or sort order
3608 	int iField = 0;
3609 
3610 	for ( const char * pTok=tTok.GetToken(); pTok; pTok=tTok.GetToken() )
3611 	{
3612 		bField = !bField;
3613 
3614 		// special case, sort by random
3615 		if ( iField==0 && bField && strcmp ( pTok, "@random" )==0 )
3616 			return SORT_CLAUSE_RANDOM;
3617 
3618 		// handle sort order
3619 		if ( !bField )
3620 		{
3621 			// check
3622 			if ( strcmp ( pTok, "desc" ) && strcmp ( pTok, "asc" ) )
3623 			{
3624 				sError.SetSprintf ( "invalid sorting order '%s'", pTok );
3625 				return SORT_CLAUSE_ERROR;
3626 			}
3627 
3628 			// set
3629 			if ( !strcmp ( pTok, "desc" ) )
3630 				tState.m_uAttrDesc |= ( 1<<iField );
3631 
3632 			iField++;
3633 			continue;
3634 		}
3635 
3636 		// handle attribute name
3637 		if ( iField==MAX_SORT_FIELDS )
3638 		{
3639 			sError.SetSprintf ( "too many sort-by attributes; maximum count is %d", MAX_SORT_FIELDS );
3640 			return SORT_CLAUSE_ERROR;
3641 		}
3642 
3643 		if ( !strcasecmp ( pTok, "@relevance" )
3644 			|| !strcasecmp ( pTok, "@rank" )
3645 			|| !strcasecmp ( pTok, "@weight" )
3646 			|| !strcasecmp ( pTok, "weight()" ) )
3647 		{
3648 			tState.m_eKeypart[iField] = SPH_KEYPART_WEIGHT;
3649 
3650 		} else if ( !strcasecmp ( pTok, "@id" ) || !strcasecmp ( pTok, "id" ) )
3651 		{
3652 			tState.m_eKeypart[iField] = SPH_KEYPART_ID;
3653 
3654 		} else
3655 		{
3656 			ESphAttr eAttrType = SPH_ATTR_NONE;
3657 
3658 			if ( !strcasecmp ( pTok, "@group" ) )
3659 				pTok = "@groupby";
3660 			else if ( !strcasecmp ( pTok, "count(*)" ) )
3661 				pTok = "@count";
3662 			else if ( !strcasecmp ( pTok, "facet()" ) )
3663 				pTok = "@groupby"; // facet() is essentially a @groupby alias
3664 			else if ( strcasecmp ( pTok, "count" )>=0 && tTok.IsSparseCount ( pTok + sizeof ( "count" ) - 1 ) ) // epression count(*) with various spaces
3665 				pTok = "@count";
3666 
3667 
3668 			// try to lookup plain attr in sorter schema
3669 			int iAttr = tSchema.GetAttrIndex ( pTok );
3670 
3671 			// do not order by mva (the result is undefined)
3672 			if ( iAttr>=0 && ( tSchema.GetAttr ( iAttr ).m_eAttrType==SPH_ATTR_UINT32SET
3673 				|| tSchema.GetAttr ( iAttr ).m_eAttrType==SPH_ATTR_INT64SET ) )
3674 			{
3675 				sError.SetSprintf ( "order by MVA is undefined" );
3676 				return SORT_CLAUSE_ERROR;
3677 			}
3678 
3679 			// try to lookup aliased count(*) and aliased groupby() in select items
3680 			if ( iAttr<0 )
3681 			{
3682 				ARRAY_FOREACH ( i, pQuery->m_dItems )
3683 				{
3684 					const CSphQueryItem & tItem = pQuery->m_dItems[i];
3685 					if ( !tItem.m_sAlias.cstr() || strcasecmp ( tItem.m_sAlias.cstr(), pTok ) )
3686 						continue;
3687 					if ( tItem.m_sExpr.Begins("@") )
3688 						iAttr = tSchema.GetAttrIndex ( tItem.m_sExpr.cstr() );
3689 					else if ( tItem.m_sExpr=="count(*)" )
3690 						iAttr = tSchema.GetAttrIndex ( "@count" );
3691 					else if ( tItem.m_sExpr=="groupby()" )
3692 					{
3693 						iAttr = tSchema.GetAttrIndex ( "@groupbystr" );
3694 						// try numeric group by
3695 						if ( iAttr<0 )
3696 							iAttr = tSchema.GetAttrIndex ( "@groupby" );
3697 					}
3698 					break; // break in any case; because we did match the alias
3699 				}
3700 			}
3701 
3702 			// try JSON attribute and use JSON attribute instead of JSON field
3703 			if ( iAttr<0 || ( iAttr>=0 && ( tSchema.GetAttr ( iAttr ).m_eAttrType==SPH_ATTR_JSON_FIELD
3704 				|| tSchema.GetAttr ( iAttr ).m_eAttrType==SPH_ATTR_JSON ) ) )
3705 			{
3706 				if ( iAttr>=0 )
3707 				{
3708 					// aliased SPH_ATTR_JSON_FIELD, reuse existing expression
3709 					const CSphColumnInfo * pAttr = &tSchema.GetAttr(iAttr);
3710 					if ( pAttr->m_pExpr.Ptr() )
3711 						pAttr->m_pExpr->AddRef(); // SetupSortRemap uses refcounted pointer, but does not AddRef() itself, so help it
3712 					tState.m_tSubExpr[iField] = pAttr->m_pExpr.Ptr();
3713 					tState.m_tSubKeys[iField] = JsonKey_t ( pTok, strlen ( pTok ) );
3714 
3715 				} else
3716 				{
3717 					CSphString sJsonCol, sJsonKey;
3718 					if ( sphJsonNameSplit ( pTok, &sJsonCol, &sJsonKey ) )
3719 					{
3720 						iAttr = tSchema.GetAttrIndex ( sJsonCol.cstr() );
3721 						if ( iAttr>=0 )
3722 						{
3723 							tState.m_tSubExpr[iField] = sphExprParse ( pTok, tSchema, NULL, NULL, sError, NULL );
3724 							tState.m_tSubKeys[iField] = JsonKey_t ( pTok, strlen ( pTok ) );
3725 						}
3726 					}
3727 				}
3728 			}
3729 
3730 			// try json conversion functions (integer()/double()/bigint() in the order by clause)
3731 			if ( iAttr<0 )
3732 			{
3733 				ISphExpr * pExpr = sphExprParse ( pTok, tSchema, &eAttrType, NULL, sError, NULL );
3734 				if ( pExpr )
3735 				{
3736 					tState.m_tSubExpr[iField] = pExpr;
3737 					tState.m_tSubKeys[iField] = JsonKey_t ( pTok, strlen(pTok) );
3738 					tState.m_tSubKeys[iField].m_uMask = 0;
3739 					tState.m_tSubType[iField] = eAttrType;
3740 					iAttr = 0; // will be remapped in SetupSortRemap
3741 				}
3742 			}
3743 
3744 			// try precalculated json fields received from agents (prefixed with @int_*)
3745 			if ( iAttr<0 )
3746 			{
3747 				CSphString sName;
3748 				sName.SetSprintf ( "%s%s", g_sIntAttrPrefix, pTok );
3749 				iAttr = tSchema.GetAttrIndex ( sName.cstr() );
3750 			}
3751 
3752 			// epic fail
3753 			if ( iAttr<0 )
3754 			{
3755 				sError.SetSprintf ( "sort-by attribute '%s' not found", pTok );
3756 				return SORT_CLAUSE_ERROR;
3757 			}
3758 
3759 			const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
3760 			tState.m_eKeypart[iField] = Attr2Keypart ( eAttrType!=SPH_ATTR_NONE ? eAttrType : tCol.m_eAttrType );
3761 			tState.m_tLocator[iField] = tCol.m_tLocator;
3762 			tState.m_dAttrs[iField] = iAttr;
3763 		}
3764 	}
3765 
3766 	if ( iField==0 )
3767 	{
3768 		sError.SetSprintf ( "no sort order defined" );
3769 		return SORT_CLAUSE_ERROR;
3770 	}
3771 
3772 	if ( iField==1 )
3773 		tState.m_eKeypart[iField++] = SPH_KEYPART_ID; // add "id ASC"
3774 
3775 	switch ( iField )
3776 	{
3777 		case 2:		eFunc = FUNC_GENERIC2; break;
3778 		case 3:		eFunc = FUNC_GENERIC3; break;
3779 		case 4:		eFunc = FUNC_GENERIC4; break;
3780 		case 5:		eFunc = FUNC_GENERIC5; break;
3781 		default:	sError.SetSprintf ( "INTERNAL ERROR: %d fields in sphParseSortClause()", iField ); return SORT_CLAUSE_ERROR;
3782 	}
3783 	return SORT_CLAUSE_OK;
3784 }
3785 
3786 //////////////////////////////////////////////////////////////////////////
3787 // SORTING+GROUPING INSTANTIATION
3788 //////////////////////////////////////////////////////////////////////////
3789 
3790 template < typename COMPGROUP >
sphCreateSorter3rd(const ISphMatchComparator * pComp,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings,bool bHasPackedFactors)3791 static ISphMatchSorter * sphCreateSorter3rd ( const ISphMatchComparator * pComp, const CSphQuery * pQuery,
3792 	const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors )
3793 {
3794 	BYTE uSelector = (bHasPackedFactors?1:0)
3795 		+(tSettings.m_bDistinct?2:0)
3796 		+(tSettings.m_bMVA?4:0)
3797 		+(tSettings.m_bImplicit?8:0)
3798 		+((pQuery->m_iGroupbyLimit>1)?16:0)
3799 		+(tSettings.m_bJson?32:0);
3800 	switch ( uSelector )
3801 	{
3802 	case 0:
3803 		return new CSphKBufferGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
3804 	case 1:
3805 		return new CSphKBufferGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
3806 	case 2:
3807 		return new CSphKBufferGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings );
3808 	case 3:
3809 		return new CSphKBufferGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings );
3810 	case 4:
3811 		return new CSphKBufferMVAGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
3812 	case 5:
3813 		return new CSphKBufferMVAGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
3814 	case 6:
3815 		return new CSphKBufferMVAGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings);
3816 	case 7:
3817 		return new CSphKBufferMVAGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings);
3818 	case 8:
3819 		return new CSphImplicitGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
3820 	case 9:
3821 		return new CSphImplicitGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
3822 	case 10:
3823 		return new CSphImplicitGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings );
3824 	case 11:
3825 		return new CSphImplicitGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings );
3826 	case 16:
3827 		return new CSphKBufferNGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
3828 	case 17:
3829 		return new CSphKBufferNGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
3830 	case 18:
3831 		return new CSphKBufferNGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings );
3832 	case 19:
3833 		return new CSphKBufferNGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings );
3834 	case 32:
3835 		return new CSphKBufferJsonGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
3836 	case 33:
3837 		return new CSphKBufferJsonGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
3838 	case 34:
3839 		return new CSphKBufferJsonGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings);
3840 	case 35:
3841 		return new CSphKBufferJsonGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings);
3842 	}
3843 	assert(0);
3844 	return NULL;
3845 }
3846 
3847 
sphCreateSorter2nd(ESphSortFunc eGroupFunc,const ISphMatchComparator * pComp,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings,bool bHasPackedFactors)3848 static ISphMatchSorter * sphCreateSorter2nd ( ESphSortFunc eGroupFunc, const ISphMatchComparator * pComp,
3849 	const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors )
3850 {
3851 	switch ( eGroupFunc )
3852 	{
3853 		case FUNC_GENERIC2:		return sphCreateSorter3rd<MatchGeneric2_fn>	( pComp, pQuery, tSettings, bHasPackedFactors ); break;
3854 		case FUNC_GENERIC3:		return sphCreateSorter3rd<MatchGeneric3_fn>	( pComp, pQuery, tSettings, bHasPackedFactors ); break;
3855 		case FUNC_GENERIC4:		return sphCreateSorter3rd<MatchGeneric4_fn>	( pComp, pQuery, tSettings, bHasPackedFactors ); break;
3856 		case FUNC_GENERIC5:		return sphCreateSorter3rd<MatchGeneric5_fn>	( pComp, pQuery, tSettings, bHasPackedFactors ); break;
3857 		case FUNC_EXPR:			return sphCreateSorter3rd<MatchExpr_fn>		( pComp, pQuery, tSettings, bHasPackedFactors ); break;
3858 		default:				return NULL;
3859 	}
3860 }
3861 
3862 
sphCreateSorter1st(ESphSortFunc eMatchFunc,ESphSortFunc eGroupFunc,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings,bool bHasPackedFactors)3863 static ISphMatchSorter * sphCreateSorter1st ( ESphSortFunc eMatchFunc, ESphSortFunc eGroupFunc,
3864 	const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors )
3865 {
3866 	if ( tSettings.m_bImplicit )
3867 		return sphCreateSorter2nd ( eGroupFunc, NULL, pQuery, tSettings, bHasPackedFactors );
3868 
3869 	ISphMatchComparator * pComp = NULL;
3870 	switch ( eMatchFunc )
3871 	{
3872 		case FUNC_REL_DESC:		pComp = new MatchRelevanceLt_fn(); break;
3873 		case FUNC_ATTR_DESC:	pComp = new MatchAttrLt_fn(); break;
3874 		case FUNC_ATTR_ASC:		pComp = new MatchAttrGt_fn(); break;
3875 		case FUNC_TIMESEGS:		pComp = new MatchTimeSegments_fn(); break;
3876 		case FUNC_GENERIC2:		pComp = new MatchGeneric2_fn(); break;
3877 		case FUNC_GENERIC3:		pComp = new MatchGeneric3_fn(); break;
3878 		case FUNC_GENERIC4:		pComp = new MatchGeneric4_fn(); break;
3879 		case FUNC_GENERIC5:		pComp = new MatchGeneric5_fn(); break;
3880 		case FUNC_EXPR:			pComp = new MatchExpr_fn(); break; // only for non-bitfields, obviously
3881 	}
3882 
3883 	assert ( pComp );
3884 	return sphCreateSorter2nd ( eGroupFunc, pComp, pQuery, tSettings, bHasPackedFactors );
3885 }
3886 
3887 //////////////////////////////////////////////////////////////////////////
3888 // GEODIST
3889 //////////////////////////////////////////////////////////////////////////
3890 
3891 struct ExprGeodist_t : public ISphExpr
3892 {
3893 public:
ExprGeodist_tExprGeodist_t3894 						ExprGeodist_t () {}
3895 	bool				Setup ( const CSphQuery * pQuery, const ISphSchema & tSchema, CSphString & sError );
3896 	virtual float		Eval ( const CSphMatch & tMatch ) const;
3897 	virtual void		Command ( ESphExprCommand eCmd, void * pArg );
3898 
3899 protected:
3900 	CSphAttrLocator		m_tGeoLatLoc;
3901 	CSphAttrLocator		m_tGeoLongLoc;
3902 	float				m_fGeoAnchorLat;
3903 	float				m_fGeoAnchorLong;
3904 	int					m_iLat;
3905 	int					m_iLon;
3906 };
3907 
3908 
Setup(const CSphQuery * pQuery,const ISphSchema & tSchema,CSphString & sError)3909 bool ExprGeodist_t::Setup ( const CSphQuery * pQuery, const ISphSchema & tSchema, CSphString & sError )
3910 {
3911 	if ( !pQuery->m_bGeoAnchor )
3912 	{
3913 		sError.SetSprintf ( "INTERNAL ERROR: no geoanchor, can not create geodist evaluator" );
3914 		return false;
3915 	}
3916 
3917 	int iLat = tSchema.GetAttrIndex ( pQuery->m_sGeoLatAttr.cstr() );
3918 	if ( iLat<0 )
3919 	{
3920 		sError.SetSprintf ( "unknown latitude attribute '%s'", pQuery->m_sGeoLatAttr.cstr() );
3921 		return false;
3922 	}
3923 
3924 	int iLong = tSchema.GetAttrIndex ( pQuery->m_sGeoLongAttr.cstr() );
3925 	if ( iLong<0 )
3926 	{
3927 		sError.SetSprintf ( "unknown latitude attribute '%s'", pQuery->m_sGeoLongAttr.cstr() );
3928 		return false;
3929 	}
3930 
3931 	m_tGeoLatLoc = tSchema.GetAttr(iLat).m_tLocator;
3932 	m_tGeoLongLoc = tSchema.GetAttr(iLong).m_tLocator;
3933 	m_fGeoAnchorLat = pQuery->m_fGeoLatitude;
3934 	m_fGeoAnchorLong = pQuery->m_fGeoLongitude;
3935 	m_iLat = iLat;
3936 	m_iLon = iLong;
3937 	return true;
3938 }
3939 
3940 
sphSqr(double v)3941 static inline double sphSqr ( double v )
3942 {
3943 	return v*v;
3944 }
3945 
3946 
Eval(const CSphMatch & tMatch) const3947 float ExprGeodist_t::Eval ( const CSphMatch & tMatch ) const
3948 {
3949 	const double R = 6384000;
3950 	float plat = tMatch.GetAttrFloat ( m_tGeoLatLoc );
3951 	float plon = tMatch.GetAttrFloat ( m_tGeoLongLoc );
3952 	double dlat = plat - m_fGeoAnchorLat;
3953 	double dlon = plon - m_fGeoAnchorLong;
3954 	double a = sphSqr ( sin ( dlat/2 ) ) + cos(plat)*cos(m_fGeoAnchorLat)*sphSqr(sin(dlon/2));
3955 	double c = 2*asin ( Min ( 1.0, sqrt(a) ) );
3956 	return (float)(R*c);
3957 }
3958 
Command(ESphExprCommand eCmd,void * pArg)3959 void ExprGeodist_t::Command ( ESphExprCommand eCmd, void * pArg )
3960 {
3961 	if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
3962 	{
3963 		static_cast < CSphVector<int>* >(pArg)->Add ( m_iLat );
3964 		static_cast < CSphVector<int>* >(pArg)->Add ( m_iLon );
3965 	}
3966 }
3967 
3968 //////////////////////////////////////////////////////////////////////////
3969 // PUBLIC FUNCTIONS (FACTORY AND FLATTENING)
3970 //////////////////////////////////////////////////////////////////////////
3971 
3972 static CSphGrouper * sphCreateGrouperString ( const CSphAttrLocator & tLoc, ESphCollation eCollation );
3973 static CSphGrouper * sphCreateGrouperMulti ( const CSphVector<CSphAttrLocator> & dLocators, const CSphVector<ESphAttr> & dAttrTypes,
3974 											const CSphVector<ISphExpr *> & dJsonKeys, ESphCollation eCollation );
3975 
SetupGroupbySettings(const CSphQuery * pQuery,const ISphSchema & tSchema,CSphGroupSorterSettings & tSettings,CSphVector<int> & dGroupColumns,CSphString & sError,bool bImplicit)3976 static bool SetupGroupbySettings ( const CSphQuery * pQuery, const ISphSchema & tSchema,
3977 								CSphGroupSorterSettings & tSettings, CSphVector<int> & dGroupColumns, CSphString & sError, bool bImplicit )
3978 {
3979 	tSettings.m_tDistinctLoc.m_iBitOffset = -1;
3980 
3981 	if ( pQuery->m_sGroupBy.IsEmpty() && !bImplicit )
3982 		return true;
3983 
3984 	if ( pQuery->m_eGroupFunc==SPH_GROUPBY_ATTRPAIR )
3985 	{
3986 		sError.SetSprintf ( "SPH_GROUPBY_ATTRPAIR is not supported any more (just group on 'bigint' attribute)" );
3987 		return false;
3988 	}
3989 
3990 	CSphString sJsonColumn;
3991 	CSphString sJsonKey;
3992 	if ( pQuery->m_eGroupFunc==SPH_GROUPBY_MULTIPLE )
3993 	{
3994 		CSphVector<CSphAttrLocator> dLocators;
3995 		CSphVector<ESphAttr> dAttrTypes;
3996 		CSphVector<ISphExpr *> dJsonKeys;
3997 
3998 		CSphVector<CSphString> dGroupBy;
3999 		const char * a = pQuery->m_sGroupBy.cstr();
4000 		const char * b = a;
4001 		while ( *a )
4002 		{
4003 			while ( *b && *b!=',' )
4004 				b++;
4005 			CSphString & sNew = dGroupBy.Add();
4006 			sNew.SetBinary ( a, b-a );
4007 			if ( *b==',' )
4008 				b += 2;
4009 			a = b;
4010 		}
4011 		dGroupBy.Uniq();
4012 
4013 		ARRAY_FOREACH ( i, dGroupBy )
4014 		{
4015 			CSphString sJsonExpr;
4016 			dJsonKeys.Add ( NULL );
4017 			if ( sphJsonNameSplit ( dGroupBy[i].cstr(), &sJsonColumn, &sJsonKey ) )
4018 			{
4019 				sJsonExpr = dGroupBy[i];
4020 				dGroupBy[i] = sJsonColumn;
4021 			}
4022 
4023 			const int iAttr = tSchema.GetAttrIndex ( dGroupBy[i].cstr() );
4024 			if ( iAttr<0 )
4025 			{
4026 				sError.SetSprintf ( "group-by attribute '%s' not found", dGroupBy[i].cstr() );
4027 				return false;
4028 			}
4029 
4030 			ESphAttr eType = tSchema.GetAttr ( iAttr ).m_eAttrType;
4031 			if ( eType==SPH_ATTR_UINT32SET || eType==SPH_ATTR_INT64SET )
4032 			{
4033 				sError.SetSprintf ( "MVA values can't be used in multiple group-by" );
4034 				return false;
4035 			} else if ( sJsonExpr.IsEmpty() && eType==SPH_ATTR_JSON )
4036 			{
4037 				sError.SetSprintf ( "JSON blob can't be used in multiple group-by" );
4038 				return false;
4039 			}
4040 
4041 			dLocators.Add ( tSchema.GetAttr ( iAttr ).m_tLocator );
4042 			dAttrTypes.Add ( eType );
4043 			dGroupColumns.Add ( iAttr );
4044 
4045 			if ( !sJsonExpr.IsEmpty() )
4046 				dJsonKeys.Last() = sphExprParse ( sJsonExpr.cstr(), tSchema, NULL, NULL, sError, NULL );
4047 		}
4048 
4049 		tSettings.m_pGrouper = sphCreateGrouperMulti ( dLocators, dAttrTypes, dJsonKeys, pQuery->m_eCollation );
4050 
4051 	} else if ( sphJsonNameSplit ( pQuery->m_sGroupBy.cstr(), &sJsonColumn, &sJsonKey ) )
4052 	{
4053 		const int iAttr = tSchema.GetAttrIndex ( sJsonColumn.cstr() );
4054 		if ( iAttr<0 )
4055 		{
4056 			sError.SetSprintf ( "groupby: no such attribute '%s'", sJsonColumn.cstr() );
4057 			return false;
4058 		}
4059 
4060 		if ( tSchema.GetAttr(iAttr).m_eAttrType!=SPH_ATTR_JSON )
4061 		{
4062 			sError.SetSprintf ( "groupby: attribute '%s' does not have subfields (must be sql_attr_json)", sJsonColumn.cstr() );
4063 			return false;
4064 		}
4065 
4066 		if ( pQuery->m_eGroupFunc!=SPH_GROUPBY_ATTR )
4067 		{
4068 			sError.SetSprintf ( "groupby: legacy groupby modes are not supported on JSON attributes" );
4069 			return false;
4070 		}
4071 
4072 		dGroupColumns.Add ( iAttr );
4073 
4074 		ISphExpr * pExpr = sphExprParse ( pQuery->m_sGroupBy.cstr(), tSchema, NULL, NULL, sError, NULL, pQuery->m_eCollation );
4075 		tSettings.m_pGrouper = new CSphGrouperJsonField ( tSchema.GetAttr(iAttr).m_tLocator, pExpr );
4076 		tSettings.m_bJson = true;
4077 
4078 	} else if ( bImplicit )
4079 	{
4080 		tSettings.m_bImplicit = true;
4081 
4082 	} else
4083 	{
4084 		// setup groupby attr
4085 		int iGroupBy = tSchema.GetAttrIndex ( pQuery->m_sGroupBy.cstr() );
4086 
4087 		if ( iGroupBy<0 )
4088 		{
4089 			// try aliased groupby attr (facets)
4090 			ARRAY_FOREACH ( i, pQuery->m_dItems )
4091 				if ( pQuery->m_sGroupBy==pQuery->m_dItems[i].m_sExpr )
4092 				{
4093 					iGroupBy = tSchema.GetAttrIndex ( pQuery->m_dItems[i].m_sAlias.cstr() );
4094 					break;
4095 
4096 				} else if ( pQuery->m_sGroupBy==pQuery->m_dItems[i].m_sAlias )
4097 				{
4098 					iGroupBy = tSchema.GetAttrIndex ( pQuery->m_dItems[i].m_sExpr.cstr() );
4099 					break;
4100 				}
4101 		}
4102 
4103 		if ( iGroupBy<0 )
4104 		{
4105 			sError.SetSprintf ( "group-by attribute '%s' not found", pQuery->m_sGroupBy.cstr() );
4106 			return false;
4107 		}
4108 
4109 		ESphAttr eType = tSchema.GetAttr ( iGroupBy ).m_eAttrType;
4110 		CSphAttrLocator tLoc = tSchema.GetAttr ( iGroupBy ).m_tLocator;
4111 		switch ( pQuery->m_eGroupFunc )
4112 		{
4113 			case SPH_GROUPBY_DAY:		tSettings.m_pGrouper = new CSphGrouperDay ( tLoc ); break;
4114 			case SPH_GROUPBY_WEEK:		tSettings.m_pGrouper = new CSphGrouperWeek ( tLoc ); break;
4115 			case SPH_GROUPBY_MONTH:		tSettings.m_pGrouper = new CSphGrouperMonth ( tLoc ); break;
4116 			case SPH_GROUPBY_YEAR:		tSettings.m_pGrouper = new CSphGrouperYear ( tLoc ); break;
4117 			case SPH_GROUPBY_ATTR:
4118 				if ( eType==SPH_ATTR_JSON || eType==SPH_ATTR_JSON_FIELD )
4119 				{
4120 					ISphExpr * pExpr = sphExprParse ( pQuery->m_sGroupBy.cstr(), tSchema, NULL, NULL, sError, NULL, pQuery->m_eCollation );
4121 					tSettings.m_pGrouper = new CSphGrouperJsonField ( tLoc, pExpr );
4122 					tSettings.m_bJson = true;
4123 
4124 				} else if ( eType==SPH_ATTR_STRING )
4125 					tSettings.m_pGrouper = sphCreateGrouperString ( tLoc, pQuery->m_eCollation );
4126 				else
4127 					tSettings.m_pGrouper = new CSphGrouperAttr ( tLoc );
4128 				break;
4129 			default:
4130 				sError.SetSprintf ( "invalid group-by mode (mode=%d)", pQuery->m_eGroupFunc );
4131 				return false;
4132 		}
4133 
4134 		tSettings.m_bMVA = ( eType==SPH_ATTR_UINT32SET || eType==SPH_ATTR_INT64SET );
4135 		tSettings.m_bMva64 = ( eType==SPH_ATTR_INT64SET );
4136 		dGroupColumns.Add ( iGroupBy );
4137 	}
4138 
4139 	// setup distinct attr
4140 	if ( !pQuery->m_sGroupDistinct.IsEmpty() )
4141 	{
4142 		int iDistinct = tSchema.GetAttrIndex ( pQuery->m_sGroupDistinct.cstr() );
4143 		if ( iDistinct<0 )
4144 		{
4145 			sError.SetSprintf ( "group-count-distinct attribute '%s' not found", pQuery->m_sGroupDistinct.cstr() );
4146 			return false;
4147 		}
4148 
4149 		tSettings.m_tDistinctLoc = tSchema.GetAttr ( iDistinct ).m_tLocator;
4150 	}
4151 
4152 	return true;
4153 }
4154 
4155 // move expressions used in ORDER BY or WITHIN GROUP ORDER BY to presort phase
FixupDependency(ISphSchema & tSchema,const int * pAttrs,int iAttrCount)4156 static bool FixupDependency ( ISphSchema & tSchema, const int * pAttrs, int iAttrCount )
4157 {
4158 	assert ( pAttrs );
4159 
4160 	CSphVector<int> dCur;
4161 
4162 	// add valid attributes to processing list
4163 	for ( int i=0; i<iAttrCount; i++ )
4164 		if ( pAttrs[i]>=0 )
4165 			dCur.Add ( pAttrs[i] );
4166 
4167 	int iInitialAttrs = dCur.GetLength();
4168 
4169 	// collect columns which affect current expressions
4170 	for ( int i=0; i<dCur.GetLength(); i++ )
4171 	{
4172 		const CSphColumnInfo & tCol = tSchema.GetAttr ( dCur[i] );
4173 		if ( tCol.m_eStage>SPH_EVAL_PRESORT && tCol.m_pExpr.Ptr()!=NULL )
4174 			tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
4175 	}
4176 
4177 	// get rid of dupes
4178 	dCur.Uniq();
4179 
4180 	// fix up of attributes stages
4181 	ARRAY_FOREACH ( i, dCur )
4182 	{
4183 		int iAttr = dCur[i];
4184 		if ( iAttr<0 )
4185 			continue;
4186 
4187 		CSphColumnInfo & tCol = const_cast < CSphColumnInfo & > ( tSchema.GetAttr ( iAttr ) );
4188 		if ( tCol.m_eStage==SPH_EVAL_FINAL )
4189 			tCol.m_eStage = SPH_EVAL_PRESORT;
4190 	}
4191 
4192 	// it uses attributes if it has dependencies from other attributes
4193 	return ( iInitialAttrs>dCur.GetLength() );
4194 }
4195 
4196 
4197 // expression that transform string pool base + offset -> ptr
4198 struct ExprSortStringAttrFixup_c : public ISphExpr
4199 {
4200 	const BYTE *			m_pStrings; ///< string pool; base for offset of string attributes
4201 	const CSphAttrLocator	m_tLocator; ///< string attribute to fix
4202 
ExprSortStringAttrFixup_cExprSortStringAttrFixup_c4203 	explicit ExprSortStringAttrFixup_c ( const CSphAttrLocator & tLocator )
4204 		: m_pStrings ( NULL )
4205 		, m_tLocator ( tLocator )
4206 	{
4207 	}
4208 
EvalExprSortStringAttrFixup_c4209 	virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0.0f; }
4210 
Int64EvalExprSortStringAttrFixup_c4211 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
4212 	{
4213 		SphAttr_t uOff = tMatch.GetAttr ( m_tLocator );
4214 		return (int64_t)( m_pStrings && uOff ? m_pStrings + uOff : NULL );
4215 	}
4216 
CommandExprSortStringAttrFixup_c4217 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
4218 	{
4219 		if ( eCmd==SPH_EXPR_SET_STRING_POOL )
4220 			m_pStrings = (const BYTE*)pArg;
4221 	}
4222 };
4223 
4224 
4225 // expression that transform string pool base + offset -> ptr
4226 struct ExprSortJson2StringPtr_c : public ISphExpr
4227 {
4228 	const BYTE *			m_pStrings; ///< string pool; base for offset of string attributes
4229 	const CSphAttrLocator	m_tJsonCol; ///< JSON attribute to fix
4230 	CSphRefcountedPtr<ISphExpr>	m_pExpr;
4231 
ExprSortJson2StringPtr_cExprSortJson2StringPtr_c4232 	ExprSortJson2StringPtr_c ( const CSphAttrLocator & tLocator, ISphExpr * pExpr )
4233 		: m_pStrings ( NULL )
4234 		, m_tJsonCol ( tLocator )
4235 		, m_pExpr ( pExpr )
4236 	{}
4237 
IsStringPtrExprSortJson2StringPtr_c4238 	virtual bool IsStringPtr () const { return true; }
4239 
EvalExprSortJson2StringPtr_c4240 	virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0.0f; }
4241 
StringEvalExprSortJson2StringPtr_c4242 	virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
4243 	{
4244 		if ( !m_pStrings || !m_pExpr )
4245 		{
4246 			*ppStr = NULL;
4247 			return 0;
4248 		}
4249 
4250 		uint64_t uValue = m_pExpr->Int64Eval ( tMatch );
4251 		const BYTE * pVal = m_pStrings + ( uValue & 0xffffffff );
4252 		ESphJsonType eJson = (ESphJsonType)( uValue >> 32 );
4253 		CSphString sVal;
4254 
4255 		// FIXME!!! make string length configurable for STRING and STRING_VECTOR to compare and allocate only Min(String.Length, CMP_LENGTH)
4256 		switch ( eJson )
4257 		{
4258 		case JSON_INT32:
4259 			sVal.SetSprintf ( "%d", sphJsonLoadInt ( &pVal ) );
4260 			break;
4261 		case JSON_INT64:
4262 			sVal.SetSprintf ( INT64_FMT, sphJsonLoadBigint ( &pVal ) );
4263 			break;
4264 		case JSON_DOUBLE:
4265 			sVal.SetSprintf ( "%f", sphQW2D ( sphJsonLoadBigint ( &pVal ) ) );
4266 			break;
4267 		case JSON_STRING:
4268 		{
4269 			int iLen = sphJsonUnpackInt ( &pVal );
4270 			sVal.SetBinary ( (const char *)pVal, iLen );
4271 			break;
4272 		}
4273 		case JSON_STRING_VECTOR:
4274 		{
4275 			int iTotalLen = sphJsonUnpackInt ( &pVal );
4276 			int iCount = sphJsonUnpackInt ( &pVal );
4277 
4278 			CSphFixedVector<BYTE> dBuf ( iTotalLen + 4 ); // data and tail GAP
4279 			BYTE * pDst = dBuf.Begin();
4280 
4281 			// head element
4282 			if ( iCount )
4283 			{
4284 				int iElemLen = sphJsonUnpackInt ( &pVal );
4285 				memcpy ( pDst, pVal, iElemLen );
4286 				pDst += iElemLen;
4287 			}
4288 
4289 			// tail elements separated by space
4290 			for ( int i=1; i<iCount; i++ )
4291 			{
4292 				*pDst++ = ' ';
4293 				int iElemLen = sphJsonUnpackInt ( &pVal );
4294 				memcpy ( pDst, pVal, iElemLen );
4295 				pDst += iElemLen;
4296 			}
4297 
4298 			int iStrLen = pDst-dBuf.Begin();
4299 			// filling junk space
4300 			while ( pDst<dBuf.Begin()+dBuf.GetLength() )
4301 				*pDst++ = '\0';
4302 
4303 			*ppStr = dBuf.LeakData();
4304 			return iStrLen;
4305 		}
4306 		default:
4307 			break;
4308 		}
4309 
4310 		int iStriLen = sVal.Length();
4311 		*ppStr = (const BYTE *)sVal.Leak();
4312 		return iStriLen;
4313 	}
4314 
CommandExprSortJson2StringPtr_c4315 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
4316 	{
4317 		if ( eCmd==SPH_EXPR_SET_STRING_POOL )
4318 		{
4319 			m_pStrings = (const BYTE*)pArg;
4320 			if ( m_pExpr.Ptr() )
4321 				m_pExpr->Command ( eCmd, pArg );
4322 		}
4323 	}
4324 };
4325 
4326 
4327 
sphIsSortStringInternal(const char * sColumnName)4328 bool sphIsSortStringInternal ( const char * sColumnName )
4329 {
4330 	assert ( sColumnName );
4331 	return ( strncmp ( sColumnName, g_sIntAttrPrefix, sizeof(g_sIntAttrPrefix)-1 )==0 );
4332 }
4333 
4334 
4335 // only STRING ( static packed ) and JSON fields mush be remapped
SetupSortRemap(CSphRsetSchema & tSorterSchema,CSphMatchComparatorState & tState)4336 static void SetupSortRemap ( CSphRsetSchema & tSorterSchema, CSphMatchComparatorState & tState )
4337 {
4338 #ifndef NDEBUG
4339 	int iColWasCount = tSorterSchema.GetAttrsCount();
4340 #endif
4341 	for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
4342 	{
4343 		if ( !( tState.m_eKeypart[i]==SPH_KEYPART_STRING || tState.m_tSubKeys[i].m_sKey.cstr() ) )
4344 			continue;
4345 
4346 		assert ( tState.m_dAttrs[i]>=0 && tState.m_dAttrs[i]<iColWasCount );
4347 
4348 		bool bIsJson = !tState.m_tSubKeys[i].m_sKey.IsEmpty();
4349 		bool bIsFunc = bIsJson && tState.m_tSubKeys[i].m_uMask==0;
4350 
4351 		CSphString sRemapCol;
4352 		sRemapCol.SetSprintf ( "%s%s", g_sIntAttrPrefix, bIsJson
4353 			? tState.m_tSubKeys[i].m_sKey.cstr()
4354 			: tSorterSchema.GetAttr ( tState.m_dAttrs[i] ).m_sName.cstr() );
4355 
4356 		int iRemap = tSorterSchema.GetAttrIndex ( sRemapCol.cstr() );
4357 		if ( iRemap==-1 && bIsJson )
4358 		{
4359 			CSphString sRemapLowercase = sRemapCol;
4360 			sRemapLowercase.ToLower();
4361 			iRemap = tSorterSchema.GetAttrIndex ( sRemapLowercase.cstr() );
4362 		}
4363 
4364 		if ( iRemap==-1 )
4365 		{
4366 			CSphColumnInfo tRemapCol ( sRemapCol.cstr(), bIsJson ? SPH_ATTR_STRINGPTR : SPH_ATTR_BIGINT );
4367 			tRemapCol.m_eStage = SPH_EVAL_PRESORT;
4368 			if ( bIsJson )
4369 				tRemapCol.m_pExpr = bIsFunc ? tState.m_tSubExpr[i] : new ExprSortJson2StringPtr_c ( tState.m_tLocator[i], tState.m_tSubExpr[i] );
4370 			else
4371 				tRemapCol.m_pExpr = new ExprSortStringAttrFixup_c ( tState.m_tLocator[i] );
4372 
4373 			if ( bIsFunc )
4374 			{
4375 				tRemapCol.m_eAttrType = tState.m_tSubType[i];
4376 				tState.m_eKeypart[i] = Attr2Keypart ( tState.m_tSubType[i] );
4377 			}
4378 
4379 			iRemap = tSorterSchema.GetAttrsCount();
4380 			tSorterSchema.AddDynamicAttr ( tRemapCol );
4381 		}
4382 		tState.m_tLocator[i] = tSorterSchema.GetAttr ( iRemap ).m_tLocator;
4383 	}
4384 }
4385 
4386 
sphSortGetStringRemap(const ISphSchema & tSorterSchema,const ISphSchema & tIndexSchema,CSphVector<SphStringSorterRemap_t> & dAttrs)4387 bool sphSortGetStringRemap ( const ISphSchema & tSorterSchema, const ISphSchema & tIndexSchema,
4388 	CSphVector<SphStringSorterRemap_t> & dAttrs )
4389 {
4390 	dAttrs.Resize ( 0 );
4391 	for ( int i=0; i<tSorterSchema.GetAttrsCount(); i++ )
4392 	{
4393 		const CSphColumnInfo & tDst = tSorterSchema.GetAttr(i);
4394 		// remap only static strings
4395 		if ( !tDst.m_sName.Begins ( g_sIntAttrPrefix ) || tDst.m_eAttrType==SPH_ATTR_STRINGPTR )
4396 			continue;
4397 
4398 		const CSphColumnInfo * pSrcCol = tIndexSchema.GetAttr ( tDst.m_sName.cstr()+sizeof(g_sIntAttrPrefix)-1 );
4399 		if ( !pSrcCol ) // skip internal attributes received from agents
4400 			continue;
4401 
4402 		SphStringSorterRemap_t & tRemap = dAttrs.Add();
4403 		tRemap.m_tSrc = pSrcCol->m_tLocator;
4404 		tRemap.m_tDst = tDst.m_tLocator;
4405 	}
4406 	return ( dAttrs.GetLength()>0 );
4407 }
4408 
4409 ////////////////////
4410 // BINARY COLLATION
4411 ////////////////////
4412 
sphCollateBinary(const BYTE * pStr1,const BYTE * pStr2,bool bPacked)4413 int sphCollateBinary ( const BYTE * pStr1, const BYTE * pStr2, bool bPacked )
4414 {
4415 	if ( bPacked )
4416 	{
4417 		int iLen1 = sphUnpackStr ( pStr1, &pStr1 );
4418 		int iLen2 = sphUnpackStr ( pStr2, &pStr2 );
4419 		int iRes = memcmp ( (const char *)pStr1, (const char *)pStr2, Min ( iLen1, iLen2 ) );
4420 		return iRes ? iRes : ( iLen1-iLen2 );
4421 	} else
4422 	{
4423 		return strcmp ( (const char *)pStr1, (const char *)pStr2 );
4424 	}
4425 }
4426 
4427 ///////////////////////////////
4428 // LIBC_CI, LIBC_CS COLLATIONS
4429 ///////////////////////////////
4430 
4431 /// libc_ci, wrapper for strcasecmp
sphCollateLibcCI(const BYTE * pStr1,const BYTE * pStr2,bool bPacked)4432 int sphCollateLibcCI ( const BYTE * pStr1, const BYTE * pStr2, bool bPacked )
4433 {
4434 	if ( bPacked )
4435 	{
4436 		int iLen1 = sphUnpackStr ( pStr1, &pStr1 );
4437 		int iLen2 = sphUnpackStr ( pStr2, &pStr2 );
4438 		int iRes = strncasecmp ( (const char *)pStr1, (const char *)pStr2, Min ( iLen1, iLen2 ) );
4439 		return iRes ? iRes : ( iLen1-iLen2 );
4440 	} else
4441 	{
4442 		return strcasecmp ( (const char *)pStr1, (const char *)pStr2 );
4443 	}
4444 }
4445 
4446 
4447 /// libc_cs, wrapper for strcoll
sphCollateLibcCS(const BYTE * pStr1,const BYTE * pStr2,bool bPacked)4448 int sphCollateLibcCS ( const BYTE * pStr1, const BYTE * pStr2, bool bPacked )
4449 {
4450 	#define COLLATE_STACK_BUFFER 1024
4451 
4452 	if ( bPacked )
4453 	{
4454 		int iLen1 = sphUnpackStr ( pStr1, &pStr1 );
4455 		int iLen2 = sphUnpackStr ( pStr2, &pStr2 );
4456 
4457 		// strcoll wants asciiz strings, so we would have to copy them over
4458 		// lets use stack buffer for smaller ones, and allocate from heap for bigger ones
4459 		int iRes = 0;
4460 		int iLen = Min ( iLen1, iLen2 );
4461 		if ( iLen<COLLATE_STACK_BUFFER )
4462 		{
4463 			// small strings on stack
4464 			BYTE sBuf1[COLLATE_STACK_BUFFER];
4465 			BYTE sBuf2[COLLATE_STACK_BUFFER];
4466 
4467 			memcpy ( sBuf1, pStr1, iLen );
4468 			memcpy ( sBuf2, pStr2, iLen );
4469 			sBuf1[iLen] = sBuf2[iLen] = '\0';
4470 			iRes = strcoll ( (const char*)sBuf1, (const char*)sBuf2 );
4471 		} else
4472 		{
4473 			// big strings on heap
4474 			char * pBuf1 = new char [ iLen ];
4475 			char * pBuf2 = new char [ iLen ];
4476 
4477 			memcpy ( pBuf1, pStr1, iLen );
4478 			memcpy ( pBuf2, pStr2, iLen );
4479 			pBuf1[iLen] = pBuf2[iLen] = '\0';
4480 			iRes = strcoll ( (const char*)pBuf1, (const char*)pBuf2 );
4481 
4482 			SafeDeleteArray ( pBuf2 );
4483 			SafeDeleteArray ( pBuf1 );
4484 		}
4485 
4486 		return iRes ? iRes : ( iLen1-iLen2 );
4487 	} else
4488 	{
4489 		return strcoll ( (const char *)pStr1, (const char *)pStr2 );
4490 	}
4491 }
4492 
4493 /////////////////////////////
4494 // UTF8_GENERAL_CI COLLATION
4495 /////////////////////////////
4496 
4497 /// 1st level LUT
4498 static unsigned short * g_dCollPlanes_UTF8CI[0x100];
4499 
4500 /// 2nd level LUT, non-trivial collation data
4501 static unsigned short g_dCollWeights_UTF8CI[0xb00] =
4502 {
4503 	// weights for 0x0 to 0x5ff
4504 	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4505 	16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4506 	32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
4507 	48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
4508 	64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
4509 	80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
4510 	96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
4511 	80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 123, 124, 125, 126, 127,
4512 	128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
4513 	144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
4514 	160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
4515 	176, 177, 178, 179, 180, 924, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
4516 	65, 65, 65, 65, 65, 65, 198, 67, 69, 69, 69, 69, 73, 73, 73, 73,
4517 	208, 78, 79, 79, 79, 79, 79, 215, 216, 85, 85, 85, 85, 89, 222, 83,
4518 	65, 65, 65, 65, 65, 65, 198, 67, 69, 69, 69, 69, 73, 73, 73, 73,
4519 	208, 78, 79, 79, 79, 79, 79, 247, 216, 85, 85, 85, 85, 89, 222, 89,
4520 	65, 65, 65, 65, 65, 65, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68,
4521 	272, 272, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 71, 71, 71, 71,
4522 	71, 71, 71, 71, 72, 72, 294, 294, 73, 73, 73, 73, 73, 73, 73, 73,
4523 	73, 73, 306, 306, 74, 74, 75, 75, 312, 76, 76, 76, 76, 76, 76, 319,
4524 	319, 321, 321, 78, 78, 78, 78, 78, 78, 329, 330, 330, 79, 79, 79, 79,
4525 	79, 79, 338, 338, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83,
4526 	83, 83, 84, 84, 84, 84, 358, 358, 85, 85, 85, 85, 85, 85, 85, 85,
4527 	85, 85, 85, 85, 87, 87, 89, 89, 89, 90, 90, 90, 90, 90, 90, 83,
4528 	384, 385, 386, 386, 388, 388, 390, 391, 391, 393, 394, 395, 395, 397, 398, 399,
4529 	400, 401, 401, 403, 404, 502, 406, 407, 408, 408, 410, 411, 412, 413, 414, 415,
4530 	79, 79, 418, 418, 420, 420, 422, 423, 423, 425, 426, 427, 428, 428, 430, 85,
4531 	85, 433, 434, 435, 435, 437, 437, 439, 440, 440, 442, 443, 444, 444, 446, 503,
4532 	448, 449, 450, 451, 452, 452, 452, 455, 455, 455, 458, 458, 458, 65, 65, 73,
4533 	73, 79, 79, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 398, 65, 65,
4534 	65, 65, 198, 198, 484, 484, 71, 71, 75, 75, 79, 79, 79, 79, 439, 439,
4535 	74, 497, 497, 497, 71, 71, 502, 503, 78, 78, 65, 65, 198, 198, 216, 216,
4536 	65, 65, 65, 65, 69, 69, 69, 69, 73, 73, 73, 73, 79, 79, 79, 79,
4537 	82, 82, 82, 82, 85, 85, 85, 85, 83, 83, 84, 84, 540, 540, 72, 72,
4538 	544, 545, 546, 546, 548, 548, 65, 65, 69, 69, 79, 79, 79, 79, 79, 79,
4539 	79, 79, 89, 89, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575,
4540 	576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591,
4541 	592, 593, 594, 385, 390, 597, 393, 394, 600, 399, 602, 400, 604, 605, 606, 607,
4542 	403, 609, 610, 404, 612, 613, 614, 615, 407, 406, 618, 619, 620, 621, 622, 412,
4543 	624, 625, 413, 627, 628, 415, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639,
4544 	422, 641, 642, 425, 644, 645, 646, 647, 430, 649, 433, 434, 652, 653, 654, 655,
4545 	656, 657, 439, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671,
4546 	672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687,
4547 	688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703,
4548 	704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719,
4549 	720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735,
4550 	736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751,
4551 	752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767,
4552 	768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783,
4553 	784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799,
4554 	800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815,
4555 	816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831,
4556 	832, 833, 834, 835, 836, 921, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847,
4557 	848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863,
4558 	864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879,
4559 	880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895,
4560 	896, 897, 898, 899, 900, 901, 913, 903, 917, 919, 921, 907, 927, 909, 933, 937,
4561 	921, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927,
4562 	928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 921, 933, 913, 917, 919, 921,
4563 	933, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927,
4564 	928, 929, 931, 931, 932, 933, 934, 935, 936, 937, 921, 933, 927, 933, 937, 975,
4565 	914, 920, 978, 978, 978, 934, 928, 983, 984, 985, 986, 986, 988, 988, 990, 990,
4566 	992, 992, 994, 994, 996, 996, 998, 998, 1000, 1000, 1002, 1002, 1004, 1004, 1006, 1006,
4567 	922, 929, 931, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
4568 	1045, 1045, 1026, 1043, 1028, 1029, 1030, 1030, 1032, 1033, 1034, 1035, 1050, 1048, 1059, 1039,
4569 	1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
4570 	1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
4571 	1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
4572 	1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
4573 	1045, 1045, 1026, 1043, 1028, 1029, 1030, 1030, 1032, 1033, 1034, 1035, 1050, 1048, 1059, 1039,
4574 	1120, 1120, 1122, 1122, 1124, 1124, 1126, 1126, 1128, 1128, 1130, 1130, 1132, 1132, 1134, 1134,
4575 	1136, 1136, 1138, 1138, 1140, 1140, 1140, 1140, 1144, 1144, 1146, 1146, 1148, 1148, 1150, 1150,
4576 	1152, 1152, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1164, 1166, 1166,
4577 	1168, 1168, 1170, 1170, 1172, 1172, 1174, 1174, 1176, 1176, 1178, 1178, 1180, 1180, 1182, 1182,
4578 	1184, 1184, 1186, 1186, 1188, 1188, 1190, 1190, 1192, 1192, 1194, 1194, 1196, 1196, 1198, 1198,
4579 	1200, 1200, 1202, 1202, 1204, 1204, 1206, 1206, 1208, 1208, 1210, 1210, 1212, 1212, 1214, 1214,
4580 	1216, 1046, 1046, 1219, 1219, 1221, 1222, 1223, 1223, 1225, 1226, 1227, 1227, 1229, 1230, 1231,
4581 	1040, 1040, 1040, 1040, 1236, 1236, 1045, 1045, 1240, 1240, 1240, 1240, 1046, 1046, 1047, 1047,
4582 	1248, 1248, 1048, 1048, 1048, 1048, 1054, 1054, 1256, 1256, 1256, 1256, 1069, 1069, 1059, 1059,
4583 	1059, 1059, 1059, 1059, 1063, 1063, 1270, 1271, 1067, 1067, 1274, 1275, 1276, 1277, 1278, 1279,
4584 	1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295,
4585 	1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311,
4586 	1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327,
4587 	1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343,
4588 	1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359,
4589 	1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375,
4590 	1376, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343,
4591 	1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359,
4592 	1360, 1361, 1362, 1363, 1364, 1365, 1366, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423,
4593 	1424, 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439,
4594 	1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455,
4595 	1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471,
4596 	1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, 1482, 1483, 1484, 1485, 1486, 1487,
4597 	1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503,
4598 	1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519,
4599 	1520, 1521, 1522, 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1533, 1534, 1535,
4600 
4601 	// weights for codepoints 0x1e00 to 0x1fff
4602 	65, 65, 66, 66, 66, 66, 66, 66, 67, 67, 68, 68, 68, 68, 68, 68,
4603 	68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70,
4604 	71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73,
4605 	75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 77, 77,
4606 	77, 77, 77, 77, 78, 78, 78, 78, 78, 78, 78, 78, 79, 79, 79, 79,
4607 	79, 79, 79, 79, 80, 80, 80, 80, 82, 82, 82, 82, 82, 82, 82, 82,
4608 	83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84,
4609 	84, 84, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 86, 86, 86, 86,
4610 	87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 88, 88, 88, 89, 89,
4611 	90, 90, 90, 90, 90, 90, 72, 84, 87, 89, 7834, 83, 7836, 7837, 7838, 7839,
4612 	65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
4613 	65, 65, 65, 65, 65, 65, 65, 65, 69, 69, 69, 69, 69, 69, 69, 69,
4614 	69, 69, 69, 69, 69, 69, 69, 69, 73, 73, 73, 73, 79, 79, 79, 79,
4615 	79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
4616 	79, 79, 79, 79, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
4617 	85, 85, 89, 89, 89, 89, 89, 89, 89, 89, 7930, 7931, 7932, 7933, 7934, 7935,
4618 	913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913,
4619 	917, 917, 917, 917, 917, 917, 7958, 7959, 917, 917, 917, 917, 917, 917, 7966, 7967,
4620 	919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919,
4621 	921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921,
4622 	927, 927, 927, 927, 927, 927, 8006, 8007, 927, 927, 927, 927, 927, 927, 8014, 8015,
4623 	933, 933, 933, 933, 933, 933, 933, 933, 8024, 933, 8026, 933, 8028, 933, 8030, 933,
4624 	937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937,
4625 	913, 8123, 917, 8137, 919, 8139, 921, 8155, 927, 8185, 933, 8171, 937, 8187, 8062, 8063,
4626 	913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913,
4627 	919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919,
4628 	937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937,
4629 	913, 913, 913, 913, 913, 8117, 913, 913, 913, 913, 913, 8123, 913, 8125, 921, 8127,
4630 	8128, 8129, 919, 919, 919, 8133, 919, 919, 917, 8137, 919, 8139, 919, 8141, 8142, 8143,
4631 	921, 921, 921, 8147, 8148, 8149, 921, 921, 921, 921, 921, 8155, 8156, 8157, 8158, 8159,
4632 	933, 933, 933, 8163, 929, 929, 933, 933, 933, 933, 933, 8171, 929, 8173, 8174, 8175,
4633 	8176, 8177, 937, 937, 937, 8181, 937, 937, 927, 8185, 937, 8187, 937, 8189, 8190, 8191
4634 
4635 	// space for codepoints 0x21xx, 0x24xx, 0xffxx (generated)
4636 };
4637 
4638 
4639 /// initialize collation LUTs
sphCollationInit()4640 void sphCollationInit()
4641 {
4642 	const int dWeightPlane[0x0b] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x1e, 0x1f, 0x21, 0x24, 0xff };
4643 
4644 	// generate missing weights
4645 	for ( int i=0; i<0x100; i++ )
4646 	{
4647 		g_dCollWeights_UTF8CI[i+0x800] = (unsigned short)( 0x2100 + i - ( i>=0x70 && i<=0x7f )*16 ); // 2170..217f, -16
4648 		g_dCollWeights_UTF8CI[i+0x900] = (unsigned short)( 0x2400 + i - ( i>=0xd0 && i<=0xe9 )*26 ); // 24d0..24e9, -26
4649 		g_dCollWeights_UTF8CI[i+0xa00] = (unsigned short)( 0xff00 + i - ( i>=0x41 && i<=0x5a )*32 ); // ff41..ff5a, -32
4650 	}
4651 
4652 	// generate planes table
4653 	for ( int i=0; i<0x100; i++ )
4654 		g_dCollPlanes_UTF8CI[i] = NULL;
4655 
4656 	for ( int i=0; i<0x0b; i++ )
4657 		g_dCollPlanes_UTF8CI [ dWeightPlane[i] ] = g_dCollWeights_UTF8CI + 0x100*i;
4658 }
4659 
4660 
4661 /// collate a single codepoint
CollateUTF8CI(int iCode)4662 static inline int CollateUTF8CI ( int iCode )
4663 {
4664 	return ( ( iCode>>16 ) || !g_dCollPlanes_UTF8CI [ iCode>>8 ] )
4665 		? iCode
4666 		: g_dCollPlanes_UTF8CI [ iCode>>8 ][ iCode&0xff ];
4667 }
4668 
4669 
4670 /// utf8_general_ci
sphCollateUtf8GeneralCI(const BYTE * pArg1,const BYTE * pArg2,bool bPacked)4671 int sphCollateUtf8GeneralCI ( const BYTE * pArg1, const BYTE * pArg2, bool bPacked )
4672 {
4673 	const BYTE * pStr1 = pArg1;
4674 	const BYTE * pStr2 = pArg2;
4675 	const BYTE * pMax1 = NULL;
4676 	const BYTE * pMax2 = NULL;
4677 	if ( bPacked )
4678 	{
4679 		int iLen1 = sphUnpackStr ( pStr1, (const BYTE**)&pStr1 );
4680 		int iLen2 = sphUnpackStr ( pStr2, (const BYTE**)&pStr2 );
4681 		pMax1 = pStr1 + iLen1;
4682 		pMax2 = pStr2 + iLen2;
4683 	}
4684 
4685 	while ( ( bPacked && pStr1<pMax1 && pStr2<pMax2 ) || ( !bPacked && *pStr1 && *pStr2 ) )
4686 	{
4687 		// FIXME! on broken data, decode might go beyond buffer bounds
4688 		int iCode1 = sphUTF8Decode ( pStr1 );
4689 		int iCode2 = sphUTF8Decode ( pStr2 );
4690 		if ( !iCode1 && !iCode2 )
4691 			return 0;
4692 		if ( !iCode1 || !iCode2 )
4693 			return !iCode1 ? -1 : 1;
4694 
4695 		if ( iCode1==iCode2 )
4696 			continue;
4697 		iCode1 = CollateUTF8CI ( iCode1 );
4698 		iCode2 = CollateUTF8CI ( iCode2 );
4699 		if ( iCode1!=iCode2 )
4700 			return iCode1-iCode2;
4701 	}
4702 
4703 	if ( bPacked )
4704 	{
4705 		if ( pStr1>=pMax1 && pStr2>=pMax2 )
4706 			return 0;
4707 		return ( pStr1<pMax1 ) ? 1 : -1;
4708 	} else
4709 	{
4710 		if ( !*pStr1 && !*pStr2 )
4711 			return 0;
4712 		return ( *pStr1 ? 1 : -1 );
4713 	}
4714 }
4715 
4716 
4717 /////////////////////////////
4718 // hashing functions
4719 /////////////////////////////
4720 
4721 
4722 class LibcCSHash_fn
4723 {
4724 public:
4725 	mutable CSphTightVector<BYTE> m_dBuf;
4726 	static const int LOCALE_SAFE_GAP = 16;
4727 
LibcCSHash_fn()4728 	LibcCSHash_fn()
4729 	{
4730 		m_dBuf.Resize ( COLLATE_STACK_BUFFER );
4731 	}
4732 
Hash(const BYTE * pStr,int iLen,uint64_t uPrev=SPH_FNV64_SEED) const4733 	uint64_t Hash ( const BYTE * pStr, int iLen, uint64_t uPrev=SPH_FNV64_SEED ) const
4734 	{
4735 		assert ( pStr && iLen );
4736 
4737 		int iCompositeLen = iLen + 1 + (int)( 3.0f * iLen ) + LOCALE_SAFE_GAP;
4738 		if ( m_dBuf.GetLength()<iCompositeLen )
4739 			m_dBuf.Resize ( iCompositeLen );
4740 
4741 		memcpy ( m_dBuf.Begin(), pStr, iLen );
4742 		m_dBuf[iLen] = '\0';
4743 
4744 		BYTE * pDst = m_dBuf.Begin()+iLen+1;
4745 		int iDstAvailable = m_dBuf.GetLength() - iLen - LOCALE_SAFE_GAP;
4746 
4747 		int iDstLen = strxfrm ( (char *)pDst, (const char *)m_dBuf.Begin(), iDstAvailable );
4748 		assert ( iDstLen<iDstAvailable+LOCALE_SAFE_GAP );
4749 
4750 		uint64_t uAcc = sphFNV64 ( pDst, iDstLen, uPrev );
4751 
4752 		return uAcc;
4753 	}
4754 };
4755 
4756 
4757 class LibcCIHash_fn
4758 {
4759 public:
Hash(const BYTE * pStr,int iLen,uint64_t uPrev=SPH_FNV64_SEED) const4760 	uint64_t Hash ( const BYTE * pStr, int iLen, uint64_t uPrev=SPH_FNV64_SEED ) const
4761 	{
4762 		assert ( pStr && iLen );
4763 
4764 		uint64_t uAcc = uPrev;
4765 		while ( iLen-- )
4766 		{
4767 			int iChar = tolower ( *pStr++ );
4768 			uAcc = sphFNV64 ( &iChar, 4, uAcc );
4769 		}
4770 
4771 		return uAcc;
4772 	}
4773 };
4774 
4775 
4776 class Utf8CIHash_fn
4777 {
4778 public:
Hash(const BYTE * pStr,int iLen,uint64_t uPrev=SPH_FNV64_SEED) const4779 	uint64_t Hash ( const BYTE * pStr, int iLen, uint64_t uPrev=SPH_FNV64_SEED ) const
4780 	{
4781 		assert ( pStr && iLen );
4782 
4783 		uint64_t uAcc = uPrev;
4784 		while ( iLen-- )
4785 		{
4786 			const BYTE * pCur = pStr++;
4787 			int iCode = sphUTF8Decode ( pCur );
4788 			iCode = CollateUTF8CI ( iCode );
4789 			uAcc = sphFNV64 ( &iCode, 4, uAcc );
4790 		}
4791 
4792 		return uAcc;
4793 	}
4794 };
4795 
4796 
sphCreateGrouperString(const CSphAttrLocator & tLoc,ESphCollation eCollation)4797 CSphGrouper * sphCreateGrouperString ( const CSphAttrLocator & tLoc, ESphCollation eCollation )
4798 {
4799 	if ( eCollation==SPH_COLLATION_UTF8_GENERAL_CI )
4800 		return new CSphGrouperString<Utf8CIHash_fn> ( tLoc );
4801 	else if ( eCollation==SPH_COLLATION_LIBC_CI )
4802 		return new CSphGrouperString<LibcCIHash_fn> ( tLoc );
4803 	else if ( eCollation==SPH_COLLATION_LIBC_CS )
4804 		return new CSphGrouperString<LibcCSHash_fn> ( tLoc );
4805 	else
4806 		return new CSphGrouperString<BinaryHash_fn> ( tLoc );
4807 }
4808 
sphCreateGrouperMulti(const CSphVector<CSphAttrLocator> & dLocators,const CSphVector<ESphAttr> & dAttrTypes,const CSphVector<ISphExpr * > & dJsonKeys,ESphCollation eCollation)4809 CSphGrouper * sphCreateGrouperMulti ( const CSphVector<CSphAttrLocator> & dLocators, const CSphVector<ESphAttr> & dAttrTypes,
4810 									const CSphVector<ISphExpr *> & dJsonKeys, ESphCollation eCollation )
4811 {
4812 	if ( eCollation==SPH_COLLATION_UTF8_GENERAL_CI )
4813 		return new CSphGrouperMulti<Utf8CIHash_fn> ( dLocators, dAttrTypes, dJsonKeys );
4814 	else if ( eCollation==SPH_COLLATION_LIBC_CI )
4815 		return new CSphGrouperMulti<LibcCIHash_fn> ( dLocators, dAttrTypes, dJsonKeys );
4816 	else if ( eCollation==SPH_COLLATION_LIBC_CS )
4817 		return new CSphGrouperMulti<LibcCSHash_fn> ( dLocators, dAttrTypes, dJsonKeys );
4818 	else
4819 		return new CSphGrouperMulti<BinaryHash_fn> ( dLocators, dAttrTypes, dJsonKeys );
4820 }
4821 
4822 
4823 /////////////////////////
4824 // SORTING QUEUE FACTORY
4825 /////////////////////////
4826 
4827 template < typename COMP >
CreatePlainSorter(bool bKbuffer,int iMaxMatches,bool bUsesAttrs,bool bFactors)4828 static ISphMatchSorter * CreatePlainSorter ( bool bKbuffer, int iMaxMatches, bool bUsesAttrs, bool bFactors )
4829 {
4830 	if ( bKbuffer )
4831 	{
4832 		if ( bFactors )
4833 			return new CSphKbufferMatchQueue<COMP, true> ( iMaxMatches, bUsesAttrs );
4834 		else
4835 			return new CSphKbufferMatchQueue<COMP, false> ( iMaxMatches, bUsesAttrs );
4836 	} else
4837 	{
4838 		if ( bFactors )
4839 			return new CSphMatchQueue<COMP, true> ( iMaxMatches, bUsesAttrs );
4840 		else
4841 			return new CSphMatchQueue<COMP, false> ( iMaxMatches, bUsesAttrs );
4842 	}
4843 }
4844 
4845 
CreatePlainSorter(ESphSortFunc eMatchFunc,bool bKbuffer,int iMaxMatches,bool bUsesAttrs,bool bFactors)4846 static ISphMatchSorter * CreatePlainSorter ( ESphSortFunc eMatchFunc, bool bKbuffer, int iMaxMatches, bool bUsesAttrs, bool bFactors )
4847 {
4848 	switch ( eMatchFunc )
4849 	{
4850 		case FUNC_REL_DESC:		return CreatePlainSorter<MatchRelevanceLt_fn>	( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4851 		case FUNC_ATTR_DESC:	return CreatePlainSorter<MatchAttrLt_fn>		( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4852 		case FUNC_ATTR_ASC:		return CreatePlainSorter<MatchAttrGt_fn>		( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4853 		case FUNC_TIMESEGS:		return CreatePlainSorter<MatchTimeSegments_fn>	( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4854 		case FUNC_GENERIC2:		return CreatePlainSorter<MatchGeneric2_fn>		( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4855 		case FUNC_GENERIC3:		return CreatePlainSorter<MatchGeneric3_fn>		( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4856 		case FUNC_GENERIC4:		return CreatePlainSorter<MatchGeneric4_fn>		( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4857 		case FUNC_GENERIC5:		return CreatePlainSorter<MatchGeneric5_fn>		( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4858 		case FUNC_EXPR:			return CreatePlainSorter<MatchExpr_fn>			( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4859 		default:				return NULL;
4860 	}
4861 }
4862 
4863 
ExtraAddSortkeys(CSphSchema * pExtra,const ISphSchema & tSorterSchema,const int * dAttrs)4864 static void ExtraAddSortkeys ( CSphSchema * pExtra, const ISphSchema & tSorterSchema, const int * dAttrs )
4865 {
4866 	if ( pExtra )
4867 		for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
4868 			if ( dAttrs[i]>=0 )
4869 				pExtra->AddAttr ( tSorterSchema.GetAttr ( dAttrs[i] ), true );
4870 }
4871 
4872 
sphCreateQueue(SphQueueSettings_t & tQueue)4873 ISphMatchSorter * sphCreateQueue ( SphQueueSettings_t & tQueue )
4874 {
4875 	// prepare for descent
4876 	ISphMatchSorter * pTop = NULL;
4877 	CSphMatchComparatorState tStateMatch, tStateGroup;
4878 
4879 	// short-cuts
4880 	const CSphQuery * pQuery = &tQueue.m_tQuery;
4881 	const ISphSchema & tSchema = tQueue.m_tSchema;
4882 	CSphString & sError = tQueue.m_sError;
4883 	CSphQueryProfile * pProfiler = tQueue.m_pProfiler;
4884 	CSphSchema * pExtra = tQueue.m_pExtra;
4885 
4886 	sError = "";
4887 	bool bHasZonespanlist = false;
4888 	bool bNeedZonespanlist = false;
4889 	DWORD uPackedFactorFlags = SPH_FACTOR_DISABLE;
4890 
4891 	///////////////////////////////////////
4892 	// build incoming and outgoing schemas
4893 	///////////////////////////////////////
4894 
4895 	// sorter schema
4896 	// adds computed expressions and groupby stuff on top of the original index schema
4897 	CSphRsetSchema tSorterSchema;
4898 	tSorterSchema = tSchema;
4899 
4900 	CSphVector<uint64_t> dQueryAttrs;
4901 
4902 	// we need this to perform a sanity check
4903 	bool bHasGroupByExpr = false;
4904 
4905 	// setup overrides, detach them into dynamic part
4906 	ARRAY_FOREACH ( i, pQuery->m_dOverrides )
4907 	{
4908 		const char * sAttr = pQuery->m_dOverrides[i].m_sAttr.cstr();
4909 
4910 		int iIndex = tSorterSchema.GetAttrIndex ( sAttr );
4911 		if ( iIndex<0 )
4912 		{
4913 			sError.SetSprintf ( "override attribute '%s' not found", sAttr );
4914 			return NULL;
4915 		}
4916 
4917 		CSphColumnInfo tCol;
4918 		tCol = tSorterSchema.GetAttr ( iIndex );
4919 		tCol.m_eStage = SPH_EVAL_OVERRIDE;
4920 		tSorterSchema.AddDynamicAttr ( tCol );
4921 		if ( pExtra )
4922 			pExtra->AddAttr ( tCol, true );
4923 		tSorterSchema.RemoveStaticAttr ( iIndex );
4924 
4925 		dQueryAttrs.Add ( sphFNV64 ( tCol.m_sName.cstr() ) );
4926 	}
4927 
4928 	// setup @geodist
4929 	if ( pQuery->m_bGeoAnchor && tSorterSchema.GetAttrIndex ( "@geodist" )<0 )
4930 	{
4931 		ExprGeodist_t * pExpr = new ExprGeodist_t ();
4932 		if ( !pExpr->Setup ( pQuery, tSorterSchema, sError ) )
4933 		{
4934 			pExpr->Release ();
4935 			return NULL;
4936 		}
4937 		CSphColumnInfo tCol ( "@geodist", SPH_ATTR_FLOAT );
4938 		tCol.m_pExpr = pExpr; // takes ownership, no need to for explicit pExpr release
4939 		tCol.m_eStage = SPH_EVAL_PREFILTER; // OPTIMIZE? actual stage depends on usage
4940 		tSorterSchema.AddDynamicAttr ( tCol );
4941 		if ( pExtra )
4942 			pExtra->AddAttr ( tCol, true );
4943 
4944 		dQueryAttrs.Add ( sphFNV64 ( tCol.m_sName.cstr() ) );
4945 	}
4946 
4947 	// setup @expr
4948 	if ( pQuery->m_eSort==SPH_SORT_EXPR && tSorterSchema.GetAttrIndex ( "@expr" )<0 )
4949 	{
4950 		CSphColumnInfo tCol ( "@expr", SPH_ATTR_FLOAT ); // enforce float type for backwards compatibility (ie. too lazy to fix those tests right now)
4951 		tCol.m_pExpr = sphExprParse ( pQuery->m_sSortBy.cstr(), tSorterSchema, NULL, NULL, sError, pProfiler, pQuery->m_eCollation, NULL, &bHasZonespanlist );
4952 		bNeedZonespanlist |= bHasZonespanlist;
4953 		if ( !tCol.m_pExpr )
4954 			return NULL;
4955 		tCol.m_eStage = SPH_EVAL_PRESORT;
4956 		tSorterSchema.AddDynamicAttr ( tCol );
4957 
4958 		dQueryAttrs.Add ( sphFNV64 ( tCol.m_sName.cstr() ) );
4959 	}
4960 
4961 	// expressions from select items
4962 	bool bHasCount = false;
4963 
4964 	if ( tQueue.m_bComputeItems )
4965 		ARRAY_FOREACH ( iItem, pQuery->m_dItems )
4966 	{
4967 		const CSphQueryItem & tItem = pQuery->m_dItems[iItem];
4968 		const CSphString & sExpr = tItem.m_sExpr;
4969 		bool bIsCount = IsCount(sExpr);
4970 		bHasCount |= bIsCount;
4971 
4972 		if ( sExpr=="*" )
4973 		{
4974 			for ( int i=0; i<tSchema.GetAttrsCount(); i++ )
4975 				dQueryAttrs.Add ( sphFNV64 ( tSchema.GetAttr(i).m_sName.cstr() ) );
4976 		}
4977 
4978 		// for now, just always pass "plain" attrs from index to sorter; they will be filtered on searchd level
4979 		int iAttrIdx = tSchema.GetAttrIndex ( sExpr.cstr() );
4980 		bool bPlainAttr = ( ( sExpr=="*" || ( iAttrIdx>=0 && tItem.m_eAggrFunc==SPH_AGGR_NONE ) ) &&
4981 							( tItem.m_sAlias.IsEmpty() || tItem.m_sAlias==tItem.m_sExpr ) );
4982 		if ( iAttrIdx>=0 )
4983 		{
4984 			ESphAttr eAttr = tSchema.GetAttr ( iAttrIdx ).m_eAttrType;
4985 			if ( eAttr==SPH_ATTR_STRING || eAttr==SPH_ATTR_UINT32SET || eAttr==SPH_ATTR_INT64SET )
4986 			{
4987 				if ( tItem.m_eAggrFunc!=SPH_AGGR_NONE )
4988 				{
4989 					sError.SetSprintf ( "can not aggregate non-scalar attribute '%s'", tItem.m_sExpr.cstr() );
4990 					return NULL;
4991 				}
4992 
4993 				if ( !bPlainAttr )
4994 				{
4995 					bPlainAttr = true;
4996 					for ( int i=0; i<iItem && bPlainAttr; i++ )
4997 						if ( sExpr==pQuery->m_dItems[i].m_sAlias )
4998 							bPlainAttr = false;
4999 				}
5000 			}
5001 		}
5002 
5003 		if ( bPlainAttr || IsGroupby ( sExpr ) || bIsCount )
5004 		{
5005 			bHasGroupByExpr = IsGroupby ( sExpr );
5006 			continue;
5007 		}
5008 
5009 		// not an attribute? must be an expression, and must be aliased by query parser
5010 		assert ( !tItem.m_sAlias.IsEmpty() );
5011 
5012 		// tricky part
5013 		// we might be fed with precomputed matches, but it's all or nothing
5014 		// the incoming match either does not have anything computed, or it has everything
5015 		int iSorterAttr = tSorterSchema.GetAttrIndex ( tItem.m_sAlias.cstr() );
5016 		if ( iSorterAttr>=0 )
5017 		{
5018 			if ( dQueryAttrs.Contains ( sphFNV64 ( tItem.m_sAlias.cstr() ) ) )
5019 			{
5020 				sError.SetSprintf ( "alias '%s' must be unique (conflicts with another alias)", tItem.m_sAlias.cstr() );
5021 				return NULL;
5022 			} else
5023 			{
5024 				tSorterSchema.RemoveStaticAttr ( iSorterAttr );
5025 			}
5026 		}
5027 
5028 		// a new and shiny expression, lets parse
5029 		CSphColumnInfo tExprCol ( tItem.m_sAlias.cstr(), SPH_ATTR_NONE );
5030 		DWORD uQueryPackedFactorFlags = SPH_FACTOR_DISABLE;
5031 
5032 		// tricky bit
5033 		// GROUP_CONCAT() adds an implicit TO_STRING() conversion on top of its argument
5034 		// and then the aggregate operation simply concatenates strings as matches arrive
5035 		// ideally, we would instead pass ownership of the expression to G_C() implementation
5036 		// and also the original expression type, and let the string conversion happen in G_C() itself
5037 		// but that ideal route seems somewhat more complicated in the current architecture
5038 		if ( tItem.m_eAggrFunc==SPH_AGGR_CAT )
5039 		{
5040 			CSphString sExpr2;
5041 			sExpr2.SetSprintf ( "TO_STRING(%s)", sExpr.cstr() );
5042 			tExprCol.m_pExpr = sphExprParse ( sExpr2.cstr(), tSorterSchema, &tExprCol.m_eAttrType,
5043 				&tExprCol.m_bWeight, sError, pProfiler, pQuery->m_eCollation, tQueue.m_pHook, &bHasZonespanlist, &uQueryPackedFactorFlags, &tExprCol.m_eStage );
5044 		} else
5045 		{
5046 			tExprCol.m_pExpr = sphExprParse ( sExpr.cstr(), tSorterSchema, &tExprCol.m_eAttrType,
5047 				&tExprCol.m_bWeight, sError, pProfiler, pQuery->m_eCollation, tQueue.m_pHook, &bHasZonespanlist, &uQueryPackedFactorFlags, &tExprCol.m_eStage );
5048 		}
5049 
5050 		uPackedFactorFlags |= uQueryPackedFactorFlags;
5051 		bNeedZonespanlist |= bHasZonespanlist;
5052 		tExprCol.m_eAggrFunc = tItem.m_eAggrFunc;
5053 		if ( !tExprCol.m_pExpr )
5054 		{
5055 			sError.SetSprintf ( "parse error: %s", sError.cstr() );
5056 			return NULL;
5057 		}
5058 
5059 		// force AVG() to be computed in floats
5060 		if ( tExprCol.m_eAggrFunc==SPH_AGGR_AVG )
5061 		{
5062 			tExprCol.m_eAttrType = SPH_ATTR_FLOAT;
5063 			tExprCol.m_tLocator.m_iBitCount = 32;
5064 		}
5065 
5066 		// force explicit type conversion for JSON attributes
5067 		if ( tExprCol.m_eAggrFunc!=SPH_AGGR_NONE && tExprCol.m_eAttrType==SPH_ATTR_JSON_FIELD )
5068 		{
5069 			sError.SetSprintf ( "ambiguous attribute type '%s', use INTEGER(), BIGINT() or DOUBLE() conversion functions", tItem.m_sExpr.cstr() );
5070 			return NULL;
5071 		}
5072 
5073 		if ( uQueryPackedFactorFlags & SPH_FACTOR_JSON_OUT )
5074 			tExprCol.m_eAttrType = SPH_ATTR_FACTORS_JSON;
5075 
5076 		// force GROUP_CONCAT() to be computed as strings
5077 		if ( tExprCol.m_eAggrFunc==SPH_AGGR_CAT )
5078 		{
5079 			tExprCol.m_eAttrType = SPH_ATTR_STRINGPTR;
5080 			tExprCol.m_tLocator.m_iBitCount = ROWITEMPTR_BITS;
5081 		}
5082 
5083 		// postpone aggregates, add non-aggregates
5084 		if ( tExprCol.m_eAggrFunc==SPH_AGGR_NONE )
5085 		{
5086 			// is this expression used in filter?
5087 			// OPTIMIZE? hash filters and do hash lookups?
5088 			if ( tExprCol.m_eAttrType!=SPH_ATTR_JSON_FIELD )
5089 			ARRAY_FOREACH ( i, pQuery->m_dFilters )
5090 				if ( pQuery->m_dFilters[i].m_sAttrName==tExprCol.m_sName )
5091 			{
5092 				// is this a hack?
5093 				// m_bWeight is computed after EarlyReject() get called
5094 				// that means we can't evaluate expressions with WEIGHT() in prefilter phase
5095 				if ( tExprCol.m_bWeight )
5096 				{
5097 					tExprCol.m_eStage = SPH_EVAL_PRESORT; // special, weight filter ( short cut )
5098 					break;
5099 				}
5100 
5101 				// so we are about to add a filter condition
5102 				// but it might depend on some preceding columns (e.g. SELECT 1+attr f1 ... WHERE f1>5)
5103 				// lets detect those and move them to prefilter \ presort phase too
5104 				CSphVector<int> dCur;
5105 				tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
5106 
5107 				// usual filter
5108 				tExprCol.m_eStage = SPH_EVAL_PREFILTER;
5109 				ARRAY_FOREACH ( j, dCur )
5110 				{
5111 					const CSphColumnInfo & tCol = tSorterSchema.GetAttr ( dCur[j] );
5112 					if ( tCol.m_bWeight )
5113 					{
5114 						tExprCol.m_eStage = SPH_EVAL_PRESORT;
5115 						tExprCol.m_bWeight = true;
5116 					}
5117 					// handle chains of dependencies (e.g. SELECT 1+attr f1, f1-1 f2 ... WHERE f2>5)
5118 					if ( tCol.m_pExpr.Ptr() )
5119 					{
5120 						tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
5121 					}
5122 				}
5123 				dCur.Uniq();
5124 
5125 				ARRAY_FOREACH ( j, dCur )
5126 				{
5127 					CSphColumnInfo & tDep = const_cast < CSphColumnInfo & > ( tSorterSchema.GetAttr ( dCur[j] ) );
5128 					if ( tDep.m_eStage>tExprCol.m_eStage )
5129 						tDep.m_eStage = tExprCol.m_eStage;
5130 				}
5131 				break;
5132 			}
5133 
5134 			// add it!
5135 			// NOTE, "final" stage might need to be fixed up later
5136 			// we'll do that when parsing sorting clause
5137 			tSorterSchema.AddDynamicAttr ( tExprCol );
5138 		} else // some aggregate
5139 		{
5140 			tExprCol.m_eStage = SPH_EVAL_PRESORT; // sorter expects computed expression
5141 			tSorterSchema.AddDynamicAttr ( tExprCol );
5142 			if ( pExtra )
5143 				pExtra->AddAttr ( tExprCol, true );
5144 
5145 			/// update aggregate dependencies (e.g. SELECT 1+attr f1, min(f1), ...)
5146 			CSphVector<int> dCur;
5147 			tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
5148 
5149 			ARRAY_FOREACH ( j, dCur )
5150 			{
5151 				const CSphColumnInfo & tCol = tSorterSchema.GetAttr ( dCur[j] );
5152 				if ( tCol.m_pExpr.Ptr() )
5153 					tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
5154 			}
5155 			dCur.Uniq();
5156 
5157 			ARRAY_FOREACH ( j, dCur )
5158 			{
5159 				CSphColumnInfo & tDep = const_cast < CSphColumnInfo & > ( tSorterSchema.GetAttr ( dCur[j] ) );
5160 				if ( tDep.m_eStage>tExprCol.m_eStage )
5161 					tDep.m_eStage = tExprCol.m_eStage;
5162 			}
5163 		}
5164 		dQueryAttrs.Add ( sphFNV64 ( (const BYTE *)tExprCol.m_sName.cstr() ) );
5165 	}
5166 
5167 	////////////////////////////////////////////
5168 	// setup groupby settings, create shortcuts
5169 	////////////////////////////////////////////
5170 
5171 	CSphVector<int> dGroupColumns;
5172 	CSphGroupSorterSettings tSettings;
5173 	bool bImplicit = false;
5174 
5175 	if ( pQuery->m_sGroupBy.IsEmpty() )
5176 		ARRAY_FOREACH_COND ( i, pQuery->m_dItems, !bImplicit )
5177 		{
5178 			const CSphQueryItem & t = pQuery->m_dItems[i];
5179 			bImplicit = ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr=="@distinct";
5180 		}
5181 
5182 	if ( !SetupGroupbySettings ( pQuery, tSorterSchema, tSettings, dGroupColumns, sError, bImplicit ) )
5183 		return NULL;
5184 
5185 	const bool bGotGroupby = !pQuery->m_sGroupBy.IsEmpty() || tSettings.m_bImplicit; // or else, check in SetupGroupbySettings() would already fail
5186 	const bool bGotDistinct = ( tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
5187 
5188 	if ( bHasGroupByExpr && !bGotGroupby )
5189 	{
5190 		sError = "GROUPBY() is allowed only in GROUP BY queries";
5191 		return NULL;
5192 	}
5193 
5194 	// check for HAVING constrains
5195 	if ( tQueue.m_pAggrFilter && !tQueue.m_pAggrFilter->m_sAttrName.IsEmpty() )
5196 	{
5197 		if ( !bGotGroupby )
5198 		{
5199 			sError.SetSprintf ( "can not use HAVING without GROUP BY" );
5200 			return NULL;
5201 		}
5202 
5203 		// should be column named at group by or it's alias or aggregate
5204 		const CSphString & sHaving = tQueue.m_pAggrFilter->m_sAttrName;
5205 		if ( !IsGroupbyMagic ( sHaving ) )
5206 		{
5207 			bool bValidHaving = false;
5208 			ARRAY_FOREACH ( i, pQuery->m_dItems )
5209 			{
5210 				const CSphQueryItem & tItem = pQuery->m_dItems[i];
5211 				if ( tItem.m_sAlias!=sHaving )
5212 					continue;
5213 
5214 				bValidHaving = ( IsGroupbyMagic ( tItem.m_sExpr ) || tItem.m_eAggrFunc!=SPH_AGGR_NONE );
5215 				break;
5216 			}
5217 
5218 			if ( !bValidHaving )
5219 			{
5220 				sError.SetSprintf ( "can not use HAVING with attribute not related to GROUP BY" );
5221 				return NULL;
5222 			}
5223 		}
5224 	}
5225 
5226 	// now lets add @groupby etc if needed
5227 	if ( bGotGroupby && tSorterSchema.GetAttrIndex ( "@groupby" )<0 )
5228 	{
5229 		ESphAttr eGroupByResult = ( !tSettings.m_bImplicit ) ? tSettings.m_pGrouper->GetResultType() : SPH_ATTR_INTEGER; // implicit do not have grouper
5230 		if ( tSettings.m_bMva64 )
5231 			eGroupByResult = SPH_ATTR_BIGINT;
5232 
5233 		CSphColumnInfo tGroupby ( "@groupby", eGroupByResult );
5234 		CSphColumnInfo tCount ( "@count", SPH_ATTR_INTEGER );
5235 		CSphColumnInfo tDistinct ( "@distinct", SPH_ATTR_INTEGER );
5236 
5237 		tGroupby.m_eStage = SPH_EVAL_SORTER;
5238 		tCount.m_eStage = SPH_EVAL_SORTER;
5239 		tDistinct.m_eStage = SPH_EVAL_SORTER;
5240 
5241 		tSorterSchema.AddDynamicAttr ( tGroupby );
5242 		tSorterSchema.AddDynamicAttr ( tCount );
5243 		if ( pExtra )
5244 		{
5245 			pExtra->AddAttr ( tGroupby, true );
5246 			pExtra->AddAttr ( tCount, true );
5247 		}
5248 		if ( bGotDistinct )
5249 		{
5250 			tSorterSchema.AddDynamicAttr ( tDistinct );
5251 			if ( pExtra )
5252 				pExtra->AddAttr ( tDistinct, true );
5253 		}
5254 
5255 		// add @groupbystr last in case we need to skip it on sending (like @int_str2ptr_*)
5256 		if ( tSettings.m_bJson )
5257 		{
5258 			CSphColumnInfo tGroupbyStr ( "@groupbystr", SPH_ATTR_JSON_FIELD );
5259 			tGroupbyStr.m_eStage = SPH_EVAL_SORTER;
5260 			tSorterSchema.AddDynamicAttr ( tGroupbyStr );
5261 		}
5262 	}
5263 
5264 #define LOC_CHECK(_cond,_msg) if (!(_cond)) { sError = "invalid schema: " _msg; return NULL; }
5265 
5266 	int iGroupby = tSorterSchema.GetAttrIndex ( "@groupby" );
5267 	if ( iGroupby>=0 )
5268 	{
5269 		tSettings.m_bDistinct = bGotDistinct;
5270 		tSettings.m_tLocGroupby = tSorterSchema.GetAttr ( iGroupby ).m_tLocator;
5271 		LOC_CHECK ( tSettings.m_tLocGroupby.m_bDynamic, "@groupby must be dynamic" );
5272 
5273 		int iCount = tSorterSchema.GetAttrIndex ( "@count" );
5274 		LOC_CHECK ( iCount>=0, "missing @count" );
5275 
5276 		tSettings.m_tLocCount = tSorterSchema.GetAttr ( iCount ).m_tLocator;
5277 		LOC_CHECK ( tSettings.m_tLocCount.m_bDynamic, "@count must be dynamic" );
5278 
5279 		int iDistinct = tSorterSchema.GetAttrIndex ( "@distinct" );
5280 		if ( bGotDistinct )
5281 		{
5282 			LOC_CHECK ( iDistinct>=0, "missing @distinct" );
5283 			tSettings.m_tLocDistinct = tSorterSchema.GetAttr ( iDistinct ).m_tLocator;
5284 			LOC_CHECK ( tSettings.m_tLocDistinct.m_bDynamic, "@distinct must be dynamic" );
5285 		} else
5286 		{
5287 			LOC_CHECK ( iDistinct<=0, "unexpected @distinct" );
5288 		}
5289 
5290 		int iGroupbyStr = tSorterSchema.GetAttrIndex ( "@groupbystr" );
5291 		if ( iGroupbyStr>=0 )
5292 			tSettings.m_tLocGroupbyStr = tSorterSchema.GetAttr ( iGroupbyStr ).m_tLocator;
5293 	}
5294 
5295 	if ( bHasCount )
5296 	{
5297 		LOC_CHECK ( tSorterSchema.GetAttrIndex ( "@count" )>=0, "Count(*) or @count is queried, but not available in the schema" );
5298 	}
5299 
5300 #undef LOC_CHECK
5301 
5302 	////////////////////////////////////
5303 	// choose and setup sorting functor
5304 	////////////////////////////////////
5305 
5306 	ESphSortFunc eMatchFunc = FUNC_REL_DESC;
5307 	ESphSortFunc eGroupFunc = FUNC_REL_DESC;
5308 	bool bUsesAttrs = false;
5309 	bool bRandomize = false;
5310 
5311 	// matches sorting function
5312 	if ( pQuery->m_eSort==SPH_SORT_EXTENDED )
5313 	{
5314 		ESortClauseParseResult eRes = sphParseSortClause ( pQuery, pQuery->m_sSortBy.cstr(),
5315 			tSorterSchema, eMatchFunc, tStateMatch, sError );
5316 
5317 		if ( eRes==SORT_CLAUSE_ERROR )
5318 			return NULL;
5319 
5320 		if ( eRes==SORT_CLAUSE_RANDOM )
5321 			bRandomize = true;
5322 
5323 		ExtraAddSortkeys ( pExtra, tSorterSchema, tStateMatch.m_dAttrs );
5324 
5325 		bUsesAttrs = FixupDependency ( tSorterSchema, tStateMatch.m_dAttrs, CSphMatchComparatorState::MAX_ATTRS );
5326 		if ( !bUsesAttrs )
5327 		{
5328 			for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
5329 			{
5330 				ESphSortKeyPart ePart = tStateMatch.m_eKeypart[i];
5331 				if ( ePart==SPH_KEYPART_INT || ePart==SPH_KEYPART_FLOAT || ePart==SPH_KEYPART_STRING || ePart==SPH_KEYPART_STRINGPTR )
5332 					bUsesAttrs = true;
5333 			}
5334 		}
5335 		SetupSortRemap ( tSorterSchema, tStateMatch );
5336 
5337 	} else if ( pQuery->m_eSort==SPH_SORT_EXPR )
5338 	{
5339 		tStateMatch.m_eKeypart[0] = SPH_KEYPART_INT;
5340 		tStateMatch.m_tLocator[0] = tSorterSchema.GetAttr ( tSorterSchema.GetAttrIndex ( "@expr" ) ).m_tLocator;
5341 		tStateMatch.m_eKeypart[1] = SPH_KEYPART_ID;
5342 		tStateMatch.m_uAttrDesc = 1;
5343 		eMatchFunc = FUNC_EXPR;
5344 		bUsesAttrs = true;
5345 
5346 	} else
5347 	{
5348 		// check sort-by attribute
5349 		if ( pQuery->m_eSort!=SPH_SORT_RELEVANCE )
5350 		{
5351 			int iSortAttr = tSorterSchema.GetAttrIndex ( pQuery->m_sSortBy.cstr() );
5352 			if ( iSortAttr<0 )
5353 			{
5354 				sError.SetSprintf ( "sort-by attribute '%s' not found", pQuery->m_sSortBy.cstr() );
5355 				return NULL;
5356 			}
5357 			const CSphColumnInfo & tAttr = tSorterSchema.GetAttr ( iSortAttr );
5358 			tStateMatch.m_eKeypart[0] = Attr2Keypart ( tAttr.m_eAttrType );
5359 			tStateMatch.m_tLocator[0] = tAttr.m_tLocator;
5360 			tStateMatch.m_dAttrs[0] = iSortAttr;
5361 			SetupSortRemap ( tSorterSchema, tStateMatch );
5362 		}
5363 
5364 		// find out what function to use and whether it needs attributes
5365 		bUsesAttrs = true;
5366 		switch ( pQuery->m_eSort )
5367 		{
5368 			case SPH_SORT_ATTR_DESC:		eMatchFunc = FUNC_ATTR_DESC; break;
5369 			case SPH_SORT_ATTR_ASC:			eMatchFunc = FUNC_ATTR_ASC; break;
5370 			case SPH_SORT_TIME_SEGMENTS:	eMatchFunc = FUNC_TIMESEGS; break;
5371 			case SPH_SORT_RELEVANCE:		eMatchFunc = FUNC_REL_DESC; bUsesAttrs = false; break;
5372 			default:
5373 				sError.SetSprintf ( "unknown sorting mode %d", pQuery->m_eSort );
5374 				return NULL;
5375 		}
5376 	}
5377 
5378 	// groups sorting function
5379 	if ( bGotGroupby )
5380 	{
5381 		ESortClauseParseResult eRes = sphParseSortClause ( pQuery, pQuery->m_sGroupSortBy.cstr(),
5382 			tSorterSchema, eGroupFunc, tStateGroup, sError );
5383 
5384 		if ( eRes==SORT_CLAUSE_ERROR || eRes==SORT_CLAUSE_RANDOM )
5385 		{
5386 			if ( eRes==SORT_CLAUSE_RANDOM )
5387 				sError.SetSprintf ( "groups can not be sorted by @random" );
5388 			return NULL;
5389 		}
5390 
5391 		ExtraAddSortkeys ( pExtra, tSorterSchema, tStateGroup.m_dAttrs );
5392 
5393 		assert ( dGroupColumns.GetLength() || tSettings.m_bImplicit );
5394 		if ( pExtra && !tSettings.m_bImplicit )
5395 		{
5396 			ARRAY_FOREACH ( i, dGroupColumns )
5397 				pExtra->AddAttr ( tSorterSchema.GetAttr ( dGroupColumns[i] ), true );
5398 		}
5399 
5400 		if ( bGotDistinct )
5401 		{
5402 			dGroupColumns.Add ( tSorterSchema.GetAttrIndex ( pQuery->m_sGroupDistinct.cstr() ) );
5403 			assert ( dGroupColumns.Last()>=0 );
5404 			if ( pExtra )
5405 				pExtra->AddAttr ( tSorterSchema.GetAttr ( dGroupColumns.Last() ), true );
5406 		}
5407 
5408 		if ( dGroupColumns.GetLength() ) // implicit case
5409 		{
5410 			FixupDependency ( tSorterSchema, dGroupColumns.Begin(), dGroupColumns.GetLength() );
5411 		}
5412 		FixupDependency ( tSorterSchema, tStateGroup.m_dAttrs, CSphMatchComparatorState::MAX_ATTRS );
5413 
5414 		// GroupSortBy str attributes setup
5415 		SetupSortRemap ( tSorterSchema, tStateGroup );
5416 	}
5417 
5418 	// set up aggregate filter for grouper
5419 	if ( bGotGroupby && tQueue.m_pAggrFilter && !tQueue.m_pAggrFilter->m_sAttrName.IsEmpty() )
5420 	{
5421 		if ( tSorterSchema.GetAttr ( tQueue.m_pAggrFilter->m_sAttrName.cstr() ) )
5422 		{
5423 			tSettings.m_pAggrFilterTrait = sphCreateAggrFilter ( tQueue.m_pAggrFilter, tQueue.m_pAggrFilter->m_sAttrName, tSorterSchema, sError );
5424 		} else
5425 		{
5426 			// having might reference aliased attributes but @* attributes got stored without alias in sorter schema
5427 			CSphString sHaving;
5428 			ARRAY_FOREACH ( i, pQuery->m_dItems )
5429 			{
5430 				const CSphQueryItem & tItem = pQuery->m_dItems[i];
5431 				if ( tItem.m_sAlias==tQueue.m_pAggrFilter->m_sAttrName )
5432 				{
5433 					sHaving = tItem.m_sExpr;
5434 					break;
5435 				}
5436 			}
5437 
5438 			if ( sHaving=="groupby()" )
5439 				sHaving = "@groupby";
5440 			else if ( sHaving=="count(*)" )
5441 				sHaving = "@count";
5442 
5443 			tSettings.m_pAggrFilterTrait = sphCreateAggrFilter ( tQueue.m_pAggrFilter, sHaving, tSorterSchema, sError );
5444 		}
5445 
5446 		if ( !tSettings.m_pAggrFilterTrait )
5447 			return NULL;
5448 	}
5449 
5450 	///////////////////
5451 	// spawn the queue
5452 	///////////////////
5453 
5454 	if ( !bGotGroupby )
5455 	{
5456 		if ( tQueue.m_pUpdate )
5457 			pTop = new CSphUpdateQueue ( pQuery->m_iMaxMatches, tQueue.m_pUpdate, pQuery->m_bIgnoreNonexistent, pQuery->m_bStrict );
5458 		else if ( tQueue.m_pDeletes )
5459 			pTop = new CSphDeleteQueue ( pQuery->m_iMaxMatches, tQueue.m_pDeletes );
5460 		else
5461 			pTop = CreatePlainSorter ( eMatchFunc, pQuery->m_bSortKbuffer, pQuery->m_iMaxMatches, bUsesAttrs, uPackedFactorFlags & SPH_FACTOR_ENABLE );
5462 	} else
5463 	{
5464 		pTop = sphCreateSorter1st ( eMatchFunc, eGroupFunc, pQuery, tSettings, uPackedFactorFlags & SPH_FACTOR_ENABLE );
5465 	}
5466 
5467 	if ( !pTop )
5468 	{
5469 		sError.SetSprintf ( "internal error: unhandled sorting mode (match-sort=%d, group=%d, group-sort=%d)",
5470 			eMatchFunc, bGotGroupby, eGroupFunc );
5471 		return NULL;
5472 	}
5473 
5474 	switch ( pQuery->m_eCollation )
5475 	{
5476 		case SPH_COLLATION_LIBC_CI:
5477 			tStateMatch.m_fnStrCmp = sphCollateLibcCI;
5478 			tStateGroup.m_fnStrCmp = sphCollateLibcCI;
5479 			break;
5480 		case SPH_COLLATION_LIBC_CS:
5481 			tStateMatch.m_fnStrCmp = sphCollateLibcCS;
5482 			tStateGroup.m_fnStrCmp = sphCollateLibcCS;
5483 			break;
5484 		case SPH_COLLATION_UTF8_GENERAL_CI:
5485 			tStateMatch.m_fnStrCmp = sphCollateUtf8GeneralCI;
5486 			tStateGroup.m_fnStrCmp = sphCollateUtf8GeneralCI;
5487 			break;
5488 		case SPH_COLLATION_BINARY:
5489 			tStateMatch.m_fnStrCmp = sphCollateBinary;
5490 			tStateGroup.m_fnStrCmp = sphCollateBinary;
5491 			break;
5492 	}
5493 
5494 	assert ( pTop );
5495 	pTop->SetState ( tStateMatch );
5496 	pTop->SetGroupState ( tStateGroup );
5497 	pTop->SetSchema ( tSorterSchema );
5498 	pTop->m_bRandomize = bRandomize;
5499 
5500 	if ( bRandomize )
5501 	{
5502 		if ( pQuery->m_iRandSeed>=0 )
5503 			sphSrand ( (DWORD)pQuery->m_iRandSeed );
5504 		else
5505 			sphAutoSrand ();
5506 	}
5507 
5508 	tQueue.m_bZonespanlist = bNeedZonespanlist;
5509 	tQueue.m_uPackedFactorFlags = uPackedFactorFlags;
5510 
5511 	return pTop;
5512 }
5513 
5514 
sphFlattenQueue(ISphMatchSorter * pQueue,CSphQueryResult * pResult,int iTag)5515 int sphFlattenQueue ( ISphMatchSorter * pQueue, CSphQueryResult * pResult, int iTag )
5516 {
5517 	if ( !pQueue || !pQueue->GetLength() )
5518 		return 0;
5519 
5520 	int iOffset = pResult->m_dMatches.GetLength ();
5521 	pResult->m_dMatches.Resize ( iOffset + pQueue->GetLength() );
5522 	int iCopied = pQueue->Flatten ( pResult->m_dMatches.Begin() + iOffset, iTag );
5523 	pResult->m_dMatches.Resize ( iOffset + iCopied );
5524 	return iCopied;
5525 }
5526 
5527 
sphHasExpressions(const CSphQuery & tQuery,const CSphSchema & tSchema)5528 bool sphHasExpressions ( const CSphQuery & tQuery, const CSphSchema & tSchema )
5529 {
5530 	ARRAY_FOREACH ( i, tQuery.m_dItems )
5531 	{
5532 		const CSphQueryItem & tItem = tQuery.m_dItems[i];
5533 		const CSphString & sExpr = tItem.m_sExpr;
5534 
5535 		// all expressions that come from parser are automatically aliased
5536 		assert ( !tItem.m_sAlias.IsEmpty() );
5537 
5538 		if ( !( sExpr=="*"
5539 			|| ( tSchema.GetAttrIndex ( sExpr.cstr() )>=0 && tItem.m_eAggrFunc==SPH_AGGR_NONE && tItem.m_sAlias==sExpr )
5540 			|| IsGroupbyMagic ( sExpr ) ) )
5541 			return true;
5542 	}
5543 
5544 	return false;
5545 }
5546 
5547 
5548 //
5549 // $Id$
5550 //
5551 
5552