1 //
2 // $Id$
3 //
4
5 //
6 // Copyright (c) 2001-2016, Andrew Aksyonoff
7 // Copyright (c) 2008-2016, Sphinx Technologies Inc
8 // All rights reserved
9 //
10 // This program is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License. You should have
12 // received a copy of the GPL license along with this program; if you
13 // did not, you can find it at http://www.gnu.org/
14 //
15
16 #include "sphinx.h"
17 #include "sphinxint.h"
18 #include "sphinxjson.h"
19
20 #include <time.h>
21 #include <math.h>
22
23 #if !USE_WINDOWS
24 #include <unistd.h>
25 #include <sys/time.h>
26 #endif
27
28 //////////////////////////////////////////////////////////////////////////
29 // TRAITS
30 //////////////////////////////////////////////////////////////////////////
31
SetState(const CSphMatchComparatorState & tState)32 void ISphMatchSorter::SetState ( const CSphMatchComparatorState & tState )
33 {
34 m_tState = tState;
35 m_tState.m_iNow = (DWORD) time ( NULL );
36 }
37
38
39 /// groupby key type
40 typedef int64_t SphGroupKey_t;
41
42
43 /// base grouper (class that computes groupby key)
44 class CSphGrouper
45 {
46 public:
~CSphGrouper()47 virtual ~CSphGrouper () {}
48 virtual SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const = 0;
49 virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const = 0;
50 virtual void GetLocator ( CSphAttrLocator & tOut ) const = 0;
51 virtual ESphAttr GetResultType () const = 0;
SetStringPool(const BYTE *)52 virtual void SetStringPool ( const BYTE * ) {}
CanMulti() const53 virtual bool CanMulti () const { return true; }
54 };
55
56
HasString(const CSphMatchComparatorState * pState)57 static bool HasString ( const CSphMatchComparatorState * pState )
58 {
59 assert ( pState );
60
61 for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
62 {
63 if ( pState->m_eKeypart[i]==SPH_KEYPART_STRING || pState->m_eKeypart[i]==SPH_KEYPART_STRINGPTR || ( pState->m_tSubKeys[i].m_sKey.cstr() ) )
64 return true;
65 }
66
67 return false;
68 }
69
70
71 /// match-sorting priority queue traits
72 class CSphMatchQueueTraits : public ISphMatchSorter, ISphNoncopyable
73 {
74 protected:
75 CSphMatch * m_pData;
76 int m_iUsed;
77 int m_iSize;
78 const bool m_bUsesAttrs;
79
80 private:
81 const int m_iDataLength;
82
83 public:
84 /// ctor
CSphMatchQueueTraits(int iSize,bool bUsesAttrs)85 CSphMatchQueueTraits ( int iSize, bool bUsesAttrs )
86 : m_iUsed ( 0 )
87 , m_iSize ( iSize )
88 , m_bUsesAttrs ( bUsesAttrs )
89 , m_iDataLength ( iSize )
90 {
91 assert ( iSize>0 );
92 m_pData = new CSphMatch [ m_iDataLength ];
93 assert ( m_pData );
94
95 m_tState.m_iNow = (DWORD) time ( NULL );
96 m_iMatchCapacity = m_iDataLength;
97 }
98
99 /// dtor
~CSphMatchQueueTraits()100 virtual ~CSphMatchQueueTraits ()
101 {
102 for ( int i=0; i<m_iDataLength; ++i )
103 m_tSchema.FreeStringPtrs ( m_pData+i );
104 SafeDeleteArray ( m_pData );
105 }
106
107 public:
UsesAttrs() const108 bool UsesAttrs () const { return m_bUsesAttrs; }
GetLength() const109 virtual int GetLength () const { return m_iUsed; }
GetDataLength() const110 virtual int GetDataLength () const { return m_iDataLength; }
111
CanMulti() const112 virtual bool CanMulti () const
113 {
114 return !HasString ( &m_tState );
115 }
116 };
117
118 //////////////////////////////////////////////////////////////////////////
119 // SORTING QUEUES
120 //////////////////////////////////////////////////////////////////////////
121
122 template < typename COMP >
123 struct CompareIndex_fn
124 {
125 const CSphMatch * m_pBase;
126 const CSphMatchComparatorState * m_pState;
127
CompareIndex_fnCompareIndex_fn128 CompareIndex_fn ( const CSphMatch * pBase, const CSphMatchComparatorState * pState )
129 : m_pBase ( pBase )
130 , m_pState ( pState )
131 {}
132
IsLessCompareIndex_fn133 bool IsLess ( int a, int b ) const
134 {
135 return COMP::IsLess ( m_pBase[b], m_pBase[a], *m_pState );
136 }
137 };
138
139 /// heap sorter
140 /// plain binary heap based PQ
141 template < typename COMP, bool NOTIFICATIONS >
142 class CSphMatchQueue : public CSphMatchQueueTraits
143 {
144 public:
145 /// ctor
CSphMatchQueue(int iSize,bool bUsesAttrs)146 CSphMatchQueue ( int iSize, bool bUsesAttrs )
147 : CSphMatchQueueTraits ( iSize, bUsesAttrs )
148 {
149 if_const ( NOTIFICATIONS )
150 m_dJustPopped.Reserve(1);
151 }
152
153 /// check if this sorter does groupby
IsGroupby() const154 virtual bool IsGroupby () const
155 {
156 return false;
157 }
158
GetWorst() const159 virtual const CSphMatch * GetWorst() const
160 {
161 return m_pData;
162 }
163
164 /// add entry to the queue
Push(const CSphMatch & tEntry)165 virtual bool Push ( const CSphMatch & tEntry )
166 {
167 m_iTotal++;
168
169 if_const ( NOTIFICATIONS )
170 {
171 m_iJustPushed = 0;
172 m_dJustPopped.Resize(0);
173 }
174
175 if ( m_iUsed==m_iSize )
176 {
177 // if it's worse that current min, reject it, else pop off current min
178 if ( COMP::IsLess ( tEntry, m_pData[0], m_tState ) )
179 return true;
180 else
181 Pop ();
182 }
183
184 // do add
185 m_tSchema.CloneMatch ( m_pData+m_iUsed, tEntry );
186
187 if_const ( NOTIFICATIONS )
188 m_iJustPushed = tEntry.m_uDocID;
189
190 int iEntry = m_iUsed++;
191
192 // sift up if needed, so that worst (lesser) ones float to the top
193 while ( iEntry )
194 {
195 int iParent = ( iEntry-1 ) >> 1;
196 if ( !COMP::IsLess ( m_pData[iEntry], m_pData[iParent], m_tState ) )
197 break;
198
199 // entry is less than parent, should float to the top
200 Swap ( m_pData[iEntry], m_pData[iParent] );
201 iEntry = iParent;
202 }
203
204 return true;
205 }
206
207 /// add grouped entry (must not happen)
PushGrouped(const CSphMatch &,bool)208 virtual bool PushGrouped ( const CSphMatch &, bool )
209 {
210 assert ( 0 );
211 return false;
212 }
213
214 /// remove root (ie. top priority) entry
Pop()215 virtual void Pop ()
216 {
217 assert ( m_iUsed );
218 if ( !(--m_iUsed) ) // empty queue? just return
219 return;
220
221 // make the last entry my new root
222 Swap ( m_pData[0], m_pData[m_iUsed] );
223 m_tSchema.FreeStringPtrs ( &m_pData[m_iUsed] );
224
225 if_const ( NOTIFICATIONS )
226 {
227 if ( m_dJustPopped.GetLength() )
228 m_dJustPopped[0] = m_pData[m_iUsed].m_uDocID;
229 else
230 m_dJustPopped.Add ( m_pData[m_iUsed].m_uDocID );
231 }
232
233 // sift down if needed
234 int iEntry = 0;
235 for ( ;; )
236 {
237 // select child
238 int iChild = (iEntry<<1) + 1;
239 if ( iChild>=m_iUsed )
240 break;
241
242 // select smallest child
243 if ( iChild+1<m_iUsed )
244 if ( COMP::IsLess ( m_pData[iChild+1], m_pData[iChild], m_tState ) )
245 iChild++;
246
247 // if smallest child is less than entry, do float it to the top
248 if ( COMP::IsLess ( m_pData[iChild], m_pData[iEntry], m_tState ) )
249 {
250 Swap ( m_pData[iChild], m_pData[iEntry] );
251 iEntry = iChild;
252 continue;
253 }
254
255 break;
256 }
257 }
258
259 /// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch * pTo,int iTag)260 int Flatten ( CSphMatch * pTo, int iTag )
261 {
262 assert ( m_iUsed>=0 );
263 pTo += m_iUsed;
264 int iCopied = m_iUsed;
265 while ( m_iUsed>0 )
266 {
267 --pTo;
268 m_tSchema.FreeStringPtrs ( pTo );
269 Swap ( *pTo, *m_pData );
270 if ( iTag>=0 )
271 pTo->m_iTag = iTag;
272 Pop ();
273 }
274 m_iTotal = 0;
275 return iCopied;
276 }
277
Finalize(ISphMatchProcessor & tProcessor,bool bCallProcessInResultSetOrder)278 void Finalize ( ISphMatchProcessor & tProcessor, bool bCallProcessInResultSetOrder )
279 {
280 if ( !GetLength() )
281 return;
282
283 if ( !bCallProcessInResultSetOrder )
284 {
285 // just evaluate in heap order
286 CSphMatch * pCur = m_pData;
287 const CSphMatch * pEnd = m_pData + m_iUsed;
288 while ( pCur<pEnd )
289 {
290 tProcessor.Process ( pCur++ );
291 }
292 } else
293 {
294 // means final-stage calls will be evaluated
295 // a) over the final, pre-limit result set
296 // b) in the final result set order
297 CSphFixedVector<int> dIndexes ( GetLength() );
298 ARRAY_FOREACH ( i, dIndexes )
299 dIndexes[i] = i;
300 sphSort ( dIndexes.Begin(), dIndexes.GetLength(), CompareIndex_fn<COMP> ( m_pData, &m_tState ) );
301
302 ARRAY_FOREACH ( i, dIndexes )
303 {
304 tProcessor.Process ( m_pData + dIndexes[i] );
305 }
306 }
307 }
308 };
309
310 //////////////////////////////////////////////////////////////////////////
311
312 /// match sorting functor
313 template < typename COMP >
314 struct MatchSort_fn : public MatchSortAccessor_t
315 {
316 CSphMatchComparatorState m_tState;
317
MatchSort_fnMatchSort_fn318 explicit MatchSort_fn ( const CSphMatchComparatorState & tState )
319 : m_tState ( tState )
320 {}
321
IsLessMatchSort_fn322 bool IsLess ( const MEDIAN_TYPE a, const MEDIAN_TYPE b )
323 {
324 return COMP::IsLess ( *a, *b, m_tState );
325 }
326 };
327
328
329 /// K-buffer (generalized double buffer) sorter
330 /// faster worst-case but slower average-case than the heap sorter
331 template < typename COMP, bool NOTIFICATIONS >
332 class CSphKbufferMatchQueue : public CSphMatchQueueTraits
333 {
334 protected:
335 static const int COEFF = 4;
336
337 CSphMatch * m_pEnd;
338 CSphMatch * m_pWorst;
339 bool m_bFinalized;
340
341 public:
342 /// ctor
CSphKbufferMatchQueue(int iSize,bool bUsesAttrs)343 CSphKbufferMatchQueue ( int iSize, bool bUsesAttrs )
344 : CSphMatchQueueTraits ( iSize*COEFF, bUsesAttrs )
345 , m_pEnd ( m_pData+iSize*COEFF )
346 , m_pWorst ( NULL )
347 , m_bFinalized ( false )
348 {
349 if_const ( NOTIFICATIONS )
350 m_dJustPopped.Reserve ( m_iSize );
351
352 m_iSize /= COEFF;
353 }
354
355 /// check if this sorter does groupby
IsGroupby() const356 virtual bool IsGroupby () const
357 {
358 return false;
359 }
360
361 /// add entry to the queue
Push(const CSphMatch & tEntry)362 virtual bool Push ( const CSphMatch & tEntry )
363 {
364 if_const ( NOTIFICATIONS )
365 {
366 m_iJustPushed = 0;
367 m_dJustPopped.Resize(0);
368 }
369
370 // quick early rejection checks
371 m_iTotal++;
372 if ( m_pWorst && COMP::IsLess ( tEntry, *m_pWorst, m_tState ) )
373 return true;
374
375 // quick check passed
376 // fill the data, back to front
377 m_bFinalized = false;
378 m_iUsed++;
379 m_tSchema.CloneMatch ( m_pEnd-m_iUsed, tEntry );
380
381 if_const ( NOTIFICATIONS )
382 m_iJustPushed = tEntry.m_uDocID;
383
384 // do the initial sort once
385 if ( m_iTotal==m_iSize )
386 {
387 assert ( m_iUsed==m_iSize && !m_pWorst );
388 MatchSort_fn<COMP> tComp ( m_tState );
389 sphSort ( m_pEnd-m_iSize, m_iSize, tComp, tComp );
390 m_pWorst = m_pEnd-m_iSize;
391 m_bFinalized = true;
392 return true;
393 }
394
395 // do the sort/cut when the K-buffer is full
396 if ( m_iUsed==m_iSize*COEFF )
397 {
398 MatchSort_fn<COMP> tComp ( m_tState );
399 sphSort ( m_pData, m_iUsed, tComp, tComp );
400
401 if_const ( NOTIFICATIONS )
402 {
403 for ( CSphMatch * pMatch = m_pData; pMatch < m_pEnd-m_iSize; pMatch++ )
404 m_dJustPopped.Add ( pMatch->m_uDocID );
405 }
406
407 m_iUsed = m_iSize;
408 m_pWorst = m_pEnd-m_iSize;
409 m_bFinalized = true;
410 }
411 return true;
412 }
413
414 /// add grouped entry (must not happen)
PushGrouped(const CSphMatch &,bool)415 virtual bool PushGrouped ( const CSphMatch &, bool )
416 {
417 assert ( 0 );
418 return false;
419 }
420
421 /// finalize, perform final sort/cut as needed
Finalize(ISphMatchProcessor & tProcessor,bool)422 virtual void Finalize ( ISphMatchProcessor & tProcessor, bool )
423 {
424 if ( !GetLength() )
425 return;
426
427 if ( !m_bFinalized )
428 {
429 MatchSort_fn<COMP> tComp ( m_tState );
430 sphSort ( m_pEnd-m_iUsed, m_iUsed, tComp, tComp );
431 m_iUsed = Min ( m_iUsed, m_iSize );
432 m_bFinalized = true;
433 }
434
435 // reverse order iteration
436 CSphMatch * pCur = m_pEnd - 1;
437 const CSphMatch * pEnd = m_pEnd - m_iUsed;
438 while ( pCur>=pEnd )
439 {
440 tProcessor.Process ( pCur-- );
441 }
442 }
443
444 /// current result set length
GetLength() const445 virtual int GetLength () const
446 {
447 return Min ( m_iUsed, m_iSize );
448 }
449
450 /// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch * pTo,int iTag)451 int Flatten ( CSphMatch * pTo, int iTag )
452 {
453 const CSphMatch * pBegin = pTo;
454
455 // ensure we are sorted
456 if ( m_iUsed )
457 {
458 MatchSort_fn<COMP> tComp ( m_tState );
459 sphSort ( m_pEnd-m_iUsed, m_iUsed, tComp, tComp );
460 }
461
462 // reverse copy
463 for ( int i=1; i<=Min ( m_iUsed, m_iSize ); i++ )
464 {
465 m_tSchema.FreeStringPtrs ( pTo );
466 Swap ( *pTo, m_pEnd[-i] );
467 if ( iTag>=0 )
468 pTo->m_iTag = iTag;
469 pTo++;
470 }
471
472 // clean up for the next work session
473 m_iTotal = 0;
474 m_iUsed = 0;
475 m_iSize = 0;
476 m_bFinalized = false;
477
478 return ( pTo-pBegin );
479 }
480 };
481
482 //////////////////////////////////////////////////////////////////////////
483
484 /// collector for UPDATE statement
485 class CSphUpdateQueue : public CSphMatchQueueTraits
486 {
487 CSphAttrUpdate m_tWorkSet;
488 CSphIndex * m_pIndex;
489 CSphString * m_pError;
490 CSphString * m_pWarning;
491 int * m_pAffected;
492
493 private:
DoUpdate()494 void DoUpdate()
495 {
496 if ( !m_iUsed )
497 return;
498
499 m_tWorkSet.m_dRowOffset.Resize ( m_iUsed );
500 m_tWorkSet.m_dDocids.Resize ( m_iUsed );
501 m_tWorkSet.m_dRows.Resize ( m_iUsed );
502
503 ARRAY_FOREACH ( i, m_tWorkSet.m_dDocids )
504 {
505 m_tWorkSet.m_dRowOffset[i] = 0;
506 m_tWorkSet.m_dDocids[i] = 0;
507 m_tWorkSet.m_dRows[i] = NULL;
508 if ( !DOCINFO2ID ( STATIC2DOCINFO ( m_pData[i].m_pStatic ) ) ) // if static attributes were copied, so, they actually dynamic
509 {
510 m_tWorkSet.m_dDocids[i] = m_pData[i].m_uDocID;
511 } else // static attributes points to the active indexes - so, no lookup, 5 times faster update.
512 {
513 m_tWorkSet.m_dRows[i] = m_pData[i].m_pStatic - ( sizeof(SphDocID_t) / sizeof(CSphRowitem) );
514 }
515 }
516
517 *m_pAffected += m_pIndex->UpdateAttributes ( m_tWorkSet, -1, *m_pError, *m_pWarning );
518 m_iUsed = 0;
519 }
520 public:
521 /// ctor
CSphUpdateQueue(int iSize,CSphAttrUpdateEx * pUpdate,bool bIgnoreNonexistent,bool bStrict)522 CSphUpdateQueue ( int iSize, CSphAttrUpdateEx * pUpdate, bool bIgnoreNonexistent, bool bStrict )
523 : CSphMatchQueueTraits ( iSize, true )
524 {
525 m_tWorkSet.m_dRowOffset.Reserve ( m_iSize );
526 m_tWorkSet.m_dDocids.Reserve ( m_iSize );
527 m_tWorkSet.m_dRows.Reserve ( m_iSize );
528
529 m_tWorkSet.m_bIgnoreNonexistent = bIgnoreNonexistent;
530 m_tWorkSet.m_bStrict = bStrict;
531 m_tWorkSet.m_dTypes = pUpdate->m_pUpdate->m_dTypes;
532 m_tWorkSet.m_dPool = pUpdate->m_pUpdate->m_dPool;
533 m_tWorkSet.m_dAttrs.Resize ( pUpdate->m_pUpdate->m_dAttrs.GetLength() );
534 ARRAY_FOREACH ( i, m_tWorkSet.m_dAttrs )
535 {
536 CSphString sTmp;
537 sTmp = pUpdate->m_pUpdate->m_dAttrs[i];
538 m_tWorkSet.m_dAttrs[i] = sTmp.Leak();
539 }
540
541 m_pIndex = pUpdate->m_pIndex;
542 m_pError = pUpdate->m_pError;
543 m_pWarning = pUpdate->m_pWarning;
544 m_pAffected = &pUpdate->m_iAffected;
545 }
546
547 /// check if this sorter does groupby
IsGroupby() const548 virtual bool IsGroupby () const
549 {
550 return false;
551 }
552
553 /// add entry to the queue
Push(const CSphMatch & tEntry)554 virtual bool Push ( const CSphMatch & tEntry )
555 {
556 m_iTotal++;
557
558 if ( m_iUsed==m_iSize )
559 DoUpdate();
560
561 // do add
562 m_tSchema.CloneMatch ( &m_pData[m_iUsed++], tEntry );
563 return true;
564 }
565
566 /// add grouped entry (must not happen)
PushGrouped(const CSphMatch &,bool)567 virtual bool PushGrouped ( const CSphMatch &, bool )
568 {
569 assert ( 0 );
570 return false;
571 }
572
573 /// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch *,int)574 int Flatten ( CSphMatch *, int )
575 {
576 assert ( m_iUsed>=0 );
577 DoUpdate();
578 m_iTotal = 0;
579 return 0;
580 }
581
Finalize(ISphMatchProcessor & tProcessor,bool)582 virtual void Finalize ( ISphMatchProcessor & tProcessor, bool )
583 {
584 if ( !GetLength() )
585 return;
586
587 // just evaluate in heap order
588 CSphMatch * pCur = m_pData;
589 const CSphMatch * pEnd = m_pData + m_iUsed;
590 while ( pCur<pEnd )
591 {
592 tProcessor.Process ( pCur++ );
593 }
594 }
595 };
596
597 //////////////////////////////////////////////////////////////////////////
598
599 /// collector for DELETE statement
600 class CSphDeleteQueue : public CSphMatchQueueTraits
601 {
602 CSphVector<SphDocID_t>* m_pValues;
603 public:
604 /// ctor
CSphDeleteQueue(int iSize,CSphVector<SphDocID_t> * pDeletes)605 CSphDeleteQueue ( int iSize, CSphVector<SphDocID_t> * pDeletes )
606 : CSphMatchQueueTraits ( 1, false )
607 , m_pValues ( pDeletes )
608 {
609 m_pValues->Reserve ( iSize );
610 }
611
612 /// check if this sorter does groupby
IsGroupby() const613 virtual bool IsGroupby () const
614 {
615 return false;
616 }
617
618 /// add entry to the queue
Push(const CSphMatch & tEntry)619 virtual bool Push ( const CSphMatch & tEntry )
620 {
621 m_iTotal++;
622 m_pValues->Add ( tEntry.m_uDocID );
623 return true;
624 }
625
626 /// add grouped entry (must not happen)
PushGrouped(const CSphMatch &,bool)627 virtual bool PushGrouped ( const CSphMatch &, bool )
628 {
629 assert ( 0 );
630 return false;
631 }
632
633 /// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch *,int)634 int Flatten ( CSphMatch *, int )
635 {
636 m_iTotal = 0;
637 return 0;
638 }
639
Finalize(ISphMatchProcessor &,bool)640 virtual void Finalize ( ISphMatchProcessor &, bool )
641 {}
642 };
643
644 //////////////////////////////////////////////////////////////////////////
645 // SORTING+GROUPING QUEUE
646 //////////////////////////////////////////////////////////////////////////
647
IsCount(const CSphString & s)648 static bool IsCount ( const CSphString & s )
649 {
650 return s=="@count" || s=="count(*)";
651 }
652
IsGroupby(const CSphString & s)653 static bool IsGroupby ( const CSphString & s )
654 {
655 return s=="@groupby" || s=="@distinct" || s=="groupby()" || s=="@groupbystr";
656 }
657
IsGroupbyMagic(const CSphString & s)658 static bool IsGroupbyMagic ( const CSphString & s )
659 {
660 return IsGroupby ( s ) || IsCount ( s );
661 }
662
663 /// groupers
664 #define GROUPER_BEGIN(_name) \
665 class _name : public CSphGrouper \
666 { \
667 protected: \
668 CSphAttrLocator m_tLocator; \
669 public: \
670 explicit _name ( const CSphAttrLocator & tLoc ) : m_tLocator ( tLoc ) {} \
671 virtual void GetLocator ( CSphAttrLocator & tOut ) const { tOut = m_tLocator; } \
672 virtual ESphAttr GetResultType () const { return m_tLocator.m_iBitCount>8*(int)sizeof(DWORD) ? SPH_ATTR_BIGINT : SPH_ATTR_INTEGER; } \
673 virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const { return KeyFromValue ( tMatch.GetAttr ( m_tLocator ) ); } \
674 virtual SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const \
675 {
676 // NOLINT
677
678 #define GROUPER_END \
679 } \
680 };
681
682
683 #define GROUPER_BEGIN_SPLIT(_name) \
684 GROUPER_BEGIN(_name) \
685 time_t tStamp = (time_t)uValue; \
686 struct tm tSplit; \
687 localtime_r ( &tStamp, &tSplit );
688
689
690 GROUPER_BEGIN ( CSphGrouperAttr )
691 return uValue;
692 GROUPER_END
693
694
695 GROUPER_BEGIN_SPLIT ( CSphGrouperDay )
696 return (tSplit.tm_year+1900)*10000 + (1+tSplit.tm_mon)*100 + tSplit.tm_mday;
697 GROUPER_END
698
699
700 GROUPER_BEGIN_SPLIT ( CSphGrouperWeek )
701 int iPrevSunday = (1+tSplit.tm_yday) - tSplit.tm_wday; // prev Sunday day of year, base 1
702 int iYear = tSplit.tm_year+1900;
703 if ( iPrevSunday<=0 ) // check if we crossed year boundary
704 {
705 // adjust day and year
706 iPrevSunday += 365;
707 iYear--;
708
709 // adjust for leap years
710 if ( iYear%4==0 && ( iYear%100!=0 || iYear%400==0 ) )
711 iPrevSunday++;
712 }
713 return iYear*1000 + iPrevSunday;
714 GROUPER_END
715
716
717 GROUPER_BEGIN_SPLIT ( CSphGrouperMonth )
718 return (tSplit.tm_year+1900)*100 + (1+tSplit.tm_mon);
719 GROUPER_END
720
721
722 GROUPER_BEGIN_SPLIT ( CSphGrouperYear )
723 return (tSplit.tm_year+1900);
724 GROUPER_END
725
726 template <class PRED>
727 class CSphGrouperString : public CSphGrouperAttr, public PRED
728 {
729 protected:
730 const BYTE * m_pStringBase;
731
732 public:
733
CSphGrouperString(const CSphAttrLocator & tLoc)734 explicit CSphGrouperString ( const CSphAttrLocator & tLoc )
735 : CSphGrouperAttr ( tLoc )
736 , m_pStringBase ( NULL )
737 {
738 }
739
GetResultType() const740 virtual ESphAttr GetResultType () const
741 {
742 return SPH_ATTR_BIGINT;
743 }
744
KeyFromValue(SphAttr_t uValue) const745 virtual SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const
746 {
747 if ( !m_pStringBase || !uValue )
748 return 0;
749
750 const BYTE * pStr = NULL;
751 int iLen = sphUnpackStr ( m_pStringBase+uValue, &pStr );
752
753 if ( !pStr || !iLen )
754 return 0;
755
756 return PRED::Hash ( pStr, iLen );
757 }
758
SetStringPool(const BYTE * pStrings)759 virtual void SetStringPool ( const BYTE * pStrings )
760 {
761 m_pStringBase = pStrings;
762 }
763
CanMulti() const764 virtual bool CanMulti () const { return false; }
765 };
766
767
768 class BinaryHash_fn
769 {
770 public:
Hash(const BYTE * pStr,int iLen,uint64_t uPrev=SPH_FNV64_SEED) const771 uint64_t Hash ( const BYTE * pStr, int iLen, uint64_t uPrev=SPH_FNV64_SEED ) const
772 {
773 assert ( pStr && iLen );
774 return sphFNV64 ( pStr, iLen, uPrev );
775 }
776 };
777
778
779 template < typename T >
FormatInt(char sBuf[32],T v)780 inline static char * FormatInt ( char sBuf[32], T v )
781 {
782 if_const ( sizeof(T)==4 && v==INT_MIN )
783 return strncpy ( sBuf, "-2147483648", 32 );
784 if_const ( sizeof(T)==8 && v==LLONG_MIN )
785 return strncpy ( sBuf, "-9223372036854775808", 32 );
786
787 bool s = ( v<0 );
788 if ( s )
789 v = -v;
790
791 char * p = sBuf+31;
792 *p = 0;
793 do
794 {
795 *--p = '0' + char ( v % 10 );
796 v /= 10;
797 } while ( v );
798 if ( s )
799 *--p = '-';
800 return p;
801 }
802
803
804 /// lookup JSON key, group by looked up value (used in CSphKBufferJsonGroupSorter)
805 class CSphGrouperJsonField : public CSphGrouper
806 {
807 protected:
808 CSphAttrLocator m_tLocator;
809
810 public:
811 ISphExpr * m_pExpr;
812 const BYTE * m_pStrings;
813
CSphGrouperJsonField(const CSphAttrLocator & tLoc,ISphExpr * pExpr)814 explicit CSphGrouperJsonField ( const CSphAttrLocator & tLoc, ISphExpr * pExpr )
815 : m_tLocator ( tLoc )
816 , m_pExpr ( pExpr )
817 , m_pStrings ( NULL )
818 {}
819
~CSphGrouperJsonField()820 virtual ~CSphGrouperJsonField ()
821 {
822 SafeRelease ( m_pExpr );
823 }
824
SetStringPool(const BYTE * pStrings)825 virtual void SetStringPool ( const BYTE * pStrings )
826 {
827 m_pStrings = pStrings;
828 if ( m_pExpr )
829 m_pExpr->Command ( SPH_EXPR_SET_STRING_POOL, (void*)pStrings );
830 }
831
GetLocator(CSphAttrLocator & tOut) const832 virtual void GetLocator ( CSphAttrLocator & tOut ) const
833 {
834 tOut = m_tLocator;
835 }
836
GetResultType() const837 virtual ESphAttr GetResultType () const
838 {
839 return SPH_ATTR_BIGINT;
840 }
841
KeyFromMatch(const CSphMatch & tMatch) const842 virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const
843 {
844 if ( !m_pExpr )
845 return SphGroupKey_t();
846 return m_pExpr->Int64Eval ( tMatch );
847 }
848
KeyFromValue(SphAttr_t) const849 virtual SphGroupKey_t KeyFromValue ( SphAttr_t ) const { assert(0); return SphGroupKey_t(); }
850 };
851
852
853 template <class PRED>
854 class CSphGrouperMulti : public CSphGrouper, public PRED
855 {
856 public:
CSphGrouperMulti(const CSphVector<CSphAttrLocator> & dLocators,const CSphVector<ESphAttr> & dAttrTypes,const CSphVector<ISphExpr * > & dJsonKeys)857 CSphGrouperMulti ( const CSphVector<CSphAttrLocator> & dLocators, const CSphVector<ESphAttr> & dAttrTypes, const CSphVector<ISphExpr *> & dJsonKeys )
858 : m_dLocators ( dLocators )
859 , m_dAttrTypes ( dAttrTypes )
860 , m_dJsonKeys ( dJsonKeys )
861 {
862 assert ( m_dLocators.GetLength()>1 );
863 assert ( m_dLocators.GetLength()==m_dAttrTypes.GetLength() && m_dLocators.GetLength()==dJsonKeys.GetLength() );
864 }
865
~CSphGrouperMulti()866 virtual ~CSphGrouperMulti ()
867 {
868 ARRAY_FOREACH ( i, m_dJsonKeys )
869 SafeDelete ( m_dJsonKeys[i] );
870 }
871
KeyFromMatch(const CSphMatch & tMatch) const872 virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const
873 {
874 SphGroupKey_t tKey = SPH_FNV64_SEED;
875
876 for ( int i=0; i<m_dLocators.GetLength(); i++ )
877 {
878 SphAttr_t tAttr = tMatch.GetAttr ( m_dLocators[i] );
879 if ( m_dAttrTypes[i]==SPH_ATTR_STRING )
880 {
881 assert ( m_pStringBase );
882
883 const BYTE * pStr = NULL;
884 int iLen = sphUnpackStr ( m_pStringBase+tAttr, &pStr );
885
886 if ( !pStr || !iLen )
887 continue;
888
889 tKey = PRED::Hash ( pStr, iLen, tKey );
890
891 } else if ( m_dAttrTypes[i]==SPH_ATTR_JSON )
892 {
893 assert ( m_pStringBase );
894
895 const BYTE * pStr = NULL;
896 int iLen = sphUnpackStr ( m_pStringBase+tAttr, &pStr );
897
898 if ( !pStr || !iLen )
899 continue;
900
901 uint64_t uValue = m_dJsonKeys[i]->Int64Eval ( tMatch );
902 const BYTE * pValue = m_pStringBase + ( uValue & 0xffffffff );
903 ESphJsonType eRes = (ESphJsonType)( uValue >> 32 );
904
905 int i32Val;
906 int64_t i64Val;
907 double fVal;
908 switch ( eRes )
909 {
910 case JSON_STRING:
911 iLen = sphJsonUnpackInt ( &pValue );
912 tKey = sphFNV64 ( pValue, iLen, tKey );
913 break;
914 case JSON_INT32:
915 i32Val = sphJsonLoadInt ( &pValue );
916 tKey = sphFNV64 ( &i32Val, sizeof(i32Val), tKey );
917 break;
918 case JSON_INT64:
919 i64Val = sphJsonLoadBigint ( &pValue );
920 tKey = sphFNV64 ( &i64Val, sizeof(i64Val), tKey );
921 break;
922 case JSON_DOUBLE:
923 fVal = sphQW2D ( sphJsonLoadBigint ( &pValue ) );
924 tKey = sphFNV64 ( &fVal, sizeof(fVal), tKey );
925 break;
926 default:
927 break;
928 }
929
930 } else
931 tKey = sphFNV64 ( &tAttr, sizeof(SphAttr_t), tKey );
932 }
933
934 return tKey;
935 }
936
SetStringPool(const BYTE * pStrings)937 virtual void SetStringPool ( const BYTE * pStrings )
938 {
939 m_pStringBase = pStrings;
940
941 ARRAY_FOREACH ( i, m_dJsonKeys )
942 {
943 if ( m_dJsonKeys[i] )
944 m_dJsonKeys[i]->Command ( SPH_EXPR_SET_STRING_POOL, (void*)pStrings );
945 }
946 }
947
KeyFromValue(SphAttr_t) const948 virtual SphGroupKey_t KeyFromValue ( SphAttr_t ) const { assert(0); return SphGroupKey_t(); }
GetLocator(CSphAttrLocator &) const949 virtual void GetLocator ( CSphAttrLocator & ) const { assert(0); }
GetResultType() const950 virtual ESphAttr GetResultType() const { return SPH_ATTR_BIGINT; }
CanMulti() const951 virtual bool CanMulti () const { return false; }
952
953 private:
954 CSphVector<CSphAttrLocator> m_dLocators;
955 CSphVector<ESphAttr> m_dAttrTypes;
956 const BYTE * m_pStringBase;
957 CSphVector<ISphExpr *> m_dJsonKeys;
958 };
959
960 //////////////////////////////////////////////////////////////////////////
961
962 /// simple fixed-size hash
963 /// doesn't keep the order
964 template < typename T, typename KEY, typename HASHFUNC >
965 class CSphFixedHash : ISphNoncopyable
966 {
967 protected:
968 static const int HASH_LIST_END = -1;
969 static const int HASH_DELETED = -2;
970
971 struct HashEntry_t
972 {
973 KEY m_tKey;
974 T m_tValue;
975 int m_iNext;
976 };
977
978 protected:
979 CSphVector<HashEntry_t> m_dEntries; ///< key-value pairs storage pool
980 CSphVector<int> m_dHash; ///< hash into m_dEntries pool
981
982 int m_iFree; ///< free pairs count
983 CSphVector<int> m_dFree; ///< free pair indexes
984
985 public:
986 /// ctor
CSphFixedHash(int iLength)987 explicit CSphFixedHash ( int iLength )
988 {
989 int iBuckets = ( 2 << sphLog2 ( iLength-1 ) ); // less than 50% bucket usage guaranteed
990 assert ( iLength>0 );
991 assert ( iLength<=iBuckets );
992
993 m_dEntries.Resize ( iLength );
994 m_dHash.Resize ( iBuckets );
995 m_dFree.Resize ( iLength );
996
997 Reset ();
998 }
999
1000 /// cleanup
Reset()1001 void Reset ()
1002 {
1003 ARRAY_FOREACH ( i, m_dEntries )
1004 m_dEntries[i].m_iNext = HASH_DELETED;
1005
1006 ARRAY_FOREACH ( i, m_dHash )
1007 m_dHash[i] = HASH_LIST_END;
1008
1009 m_iFree = m_dFree.GetLength();
1010 ARRAY_FOREACH ( i, m_dFree )
1011 m_dFree[i] = i;
1012 }
1013
1014 /// add new entry
1015 /// returns NULL on success
1016 /// returns pointer to value if already hashed, or replace it with new one, if insisted.
Add(const T & tValue,const KEY & tKey,bool bReplace=false)1017 T * Add ( const T & tValue, const KEY & tKey, bool bReplace=false )
1018 {
1019 assert ( m_iFree>0 && "hash overflow" );
1020
1021 // check if it's already hashed
1022 DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
1023 int iPrev = -1, iEntry;
1024
1025 for ( iEntry=m_dHash[uHash]; iEntry>=0; iPrev=iEntry, iEntry=m_dEntries[iEntry].m_iNext )
1026 if ( m_dEntries[iEntry].m_tKey==tKey )
1027 {
1028 if ( bReplace )
1029 m_dEntries[iEntry].m_tValue = tValue;
1030 return &m_dEntries[iEntry].m_tValue;
1031 }
1032 assert ( iEntry!=HASH_DELETED );
1033
1034 // if it's not, do add
1035 int iNew = m_dFree [ --m_iFree ];
1036
1037 HashEntry_t & tNew = m_dEntries[iNew];
1038 assert ( tNew.m_iNext==HASH_DELETED );
1039
1040 tNew.m_tKey = tKey;
1041 tNew.m_tValue = tValue;
1042 tNew.m_iNext = HASH_LIST_END;
1043
1044 if ( iPrev>=0 )
1045 {
1046 assert ( m_dEntries[iPrev].m_iNext==HASH_LIST_END );
1047 m_dEntries[iPrev].m_iNext = iNew;
1048 } else
1049 {
1050 assert ( m_dHash[uHash]==HASH_LIST_END );
1051 m_dHash[uHash] = iNew;
1052 }
1053 return NULL;
1054 }
1055
1056 /// remove entry from hash
Remove(const KEY & tKey)1057 void Remove ( const KEY & tKey )
1058 {
1059 // check if it's already hashed
1060 DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
1061 int iPrev = -1, iEntry;
1062
1063 for ( iEntry=m_dHash[uHash]; iEntry>=0; iPrev=iEntry, iEntry=m_dEntries[iEntry].m_iNext )
1064 if ( m_dEntries[iEntry].m_tKey==tKey )
1065 {
1066 // found, remove it
1067 assert ( m_dEntries[iEntry].m_iNext!=HASH_DELETED );
1068 if ( iPrev>=0 )
1069 m_dEntries[iPrev].m_iNext = m_dEntries[iEntry].m_iNext;
1070 else
1071 m_dHash[uHash] = m_dEntries[iEntry].m_iNext;
1072
1073 #ifndef NDEBUG
1074 m_dEntries[iEntry].m_iNext = HASH_DELETED;
1075 #endif
1076
1077 m_dFree [ m_iFree++ ] = iEntry;
1078 return;
1079 }
1080 assert ( iEntry!=HASH_DELETED );
1081 }
1082
1083 /// get value pointer by key
operator ()(const KEY & tKey) const1084 T * operator () ( const KEY & tKey ) const
1085 {
1086 DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
1087 int iEntry;
1088
1089 for ( iEntry=m_dHash[uHash]; iEntry>=0; iEntry=m_dEntries[iEntry].m_iNext )
1090 if ( m_dEntries[iEntry].m_tKey==tKey )
1091 return (T*)&m_dEntries[iEntry].m_tValue;
1092
1093 assert ( iEntry!=HASH_DELETED );
1094 return NULL;
1095 }
1096 };
1097
1098
1099 /////////////////////////////////////////////////////////////////////////////
1100
1101 /// (group,attrvalue) pair
1102 struct SphGroupedValue_t
1103 {
1104 public:
1105 SphGroupKey_t m_uGroup;
1106 SphAttr_t m_uValue;
1107 int m_iCount;
1108
1109 public:
SphGroupedValue_tSphGroupedValue_t1110 SphGroupedValue_t ()
1111 {}
1112
SphGroupedValue_tSphGroupedValue_t1113 SphGroupedValue_t ( SphGroupKey_t uGroup, SphAttr_t uValue, int iCount )
1114 : m_uGroup ( uGroup )
1115 , m_uValue ( uValue )
1116 , m_iCount ( iCount )
1117 {}
1118
operator <SphGroupedValue_t1119 inline bool operator < ( const SphGroupedValue_t & rhs ) const
1120 {
1121 if ( m_uGroup!=rhs.m_uGroup ) return m_uGroup<rhs.m_uGroup;
1122 if ( m_uValue!=rhs.m_uValue ) return m_uValue<rhs.m_uValue;
1123 return m_iCount>rhs.m_iCount;
1124 }
1125
operator ==SphGroupedValue_t1126 inline bool operator == ( const SphGroupedValue_t & rhs ) const
1127 {
1128 return m_uGroup==rhs.m_uGroup && m_uValue==rhs.m_uValue;
1129 }
1130 };
1131
1132 /// same as SphGroupedValue_t but without group
1133 struct SphUngroupedValue_t
1134 {
1135 public:
1136 SphAttr_t m_uValue;
1137 int m_iCount;
1138
1139 public:
SphUngroupedValue_tSphUngroupedValue_t1140 SphUngroupedValue_t ()
1141 {}
1142
SphUngroupedValue_tSphUngroupedValue_t1143 SphUngroupedValue_t ( SphAttr_t uValue, int iCount )
1144 : m_uValue ( uValue )
1145 , m_iCount ( iCount )
1146 {}
1147
operator <SphUngroupedValue_t1148 inline bool operator < ( const SphUngroupedValue_t & rhs ) const
1149 {
1150 if ( m_uValue!=rhs.m_uValue ) return m_uValue<rhs.m_uValue;
1151 return m_iCount>rhs.m_iCount;
1152 }
1153
operator ==SphUngroupedValue_t1154 inline bool operator == ( const SphUngroupedValue_t & rhs ) const
1155 {
1156 return m_uValue==rhs.m_uValue;
1157 }
1158 };
1159
1160
1161 /// unique values counter
1162 /// used for COUNT(DISTINCT xxx) GROUP BY yyy queries
1163 class CSphUniqounter : public CSphVector<SphGroupedValue_t>
1164 {
1165 public:
1166 #ifndef NDEBUG
CSphUniqounter()1167 CSphUniqounter () : m_iCountPos ( 0 ), m_bSorted ( true ) { Reserve ( 16384 ); }
Add(const SphGroupedValue_t & tValue)1168 void Add ( const SphGroupedValue_t & tValue ) { CSphVector<SphGroupedValue_t>::Add ( tValue ); m_bSorted = false; }
Sort()1169 void Sort () { CSphVector<SphGroupedValue_t>::Sort (); m_bSorted = true; }
1170
1171 #else
1172 CSphUniqounter () : m_iCountPos ( 0 ) {}
1173 #endif
1174
1175 public:
1176 int CountStart ( SphGroupKey_t * pOutGroup ); ///< starting counting distinct values, returns count and group key (0 if empty)
1177 int CountNext ( SphGroupKey_t * pOutGroup ); ///< continues counting distinct values, returns count and group key (0 if done)
1178 void Compact ( SphGroupKey_t * pRemoveGroups, int iRemoveGroups );
1179
1180 protected:
1181 int m_iCountPos;
1182
1183 #ifndef NDEBUG
1184 bool m_bSorted;
1185 #endif
1186 };
1187
1188
CountStart(SphGroupKey_t * pOutGroup)1189 int CSphUniqounter::CountStart ( SphGroupKey_t * pOutGroup )
1190 {
1191 m_iCountPos = 0;
1192 return CountNext ( pOutGroup );
1193 }
1194
1195
CountNext(SphGroupKey_t * pOutGroup)1196 int CSphUniqounter::CountNext ( SphGroupKey_t * pOutGroup )
1197 {
1198 assert ( m_bSorted );
1199 if ( m_iCountPos>=m_iLength )
1200 return 0;
1201
1202 SphGroupKey_t uGroup = m_pData[m_iCountPos].m_uGroup;
1203 SphAttr_t uValue = m_pData[m_iCountPos].m_uValue;
1204 int iCount = m_pData[m_iCountPos].m_iCount;
1205 *pOutGroup = uGroup;
1206
1207 while ( m_iCountPos<m_iLength && m_pData[m_iCountPos].m_uGroup==uGroup )
1208 {
1209 if ( m_pData[m_iCountPos].m_uValue!=uValue )
1210 iCount += m_pData[m_iCountPos].m_iCount;
1211 uValue = m_pData[m_iCountPos].m_uValue;
1212 m_iCountPos++;
1213 }
1214 return iCount;
1215 }
1216
1217
Compact(SphGroupKey_t * pRemoveGroups,int iRemoveGroups)1218 void CSphUniqounter::Compact ( SphGroupKey_t * pRemoveGroups, int iRemoveGroups )
1219 {
1220 assert ( m_bSorted );
1221 if ( !m_iLength )
1222 return;
1223
1224 sphSort ( pRemoveGroups, iRemoveGroups );
1225
1226 SphGroupedValue_t * pSrc = m_pData;
1227 SphGroupedValue_t * pDst = m_pData;
1228 SphGroupedValue_t * pEnd = m_pData + m_iLength;
1229
1230 // skip remove-groups which are not in my list
1231 while ( iRemoveGroups && (*pRemoveGroups)<pSrc->m_uGroup )
1232 {
1233 pRemoveGroups++;
1234 iRemoveGroups--;
1235 }
1236
1237 for ( ; pSrc<pEnd; pSrc++ )
1238 {
1239 // check if this entry needs to be removed
1240 while ( iRemoveGroups && (*pRemoveGroups)<pSrc->m_uGroup )
1241 {
1242 pRemoveGroups++;
1243 iRemoveGroups--;
1244 }
1245 if ( iRemoveGroups && pSrc->m_uGroup==*pRemoveGroups )
1246 continue;
1247
1248 // check if it's a dupe
1249 if ( pDst>m_pData && pDst[-1]==pSrc[0] )
1250 continue;
1251
1252 *pDst++ = *pSrc;
1253 }
1254
1255 assert ( pDst-m_pData<=m_iLength );
1256 m_iLength = pDst-m_pData;
1257 }
1258
1259 /////////////////////////////////////////////////////////////////////////////
1260
1261 /// attribute magic
1262 enum
1263 {
1264 SPH_VATTR_ID = -1, ///< tells match sorter to use doc id
1265 SPH_VATTR_RELEVANCE = -2, ///< tells match sorter to use match weight
1266 SPH_VATTR_FLOAT = 10000 ///< tells match sorter to compare floats
1267 };
1268
1269
1270 /// match comparator interface from group-by sorter point of view
1271 struct ISphMatchComparator
1272 {
~ISphMatchComparatorISphMatchComparator1273 virtual ~ISphMatchComparator () {}
1274 virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & tState ) const = 0;
1275 };
1276
1277
1278 /// additional group-by sorter settings
1279 struct CSphGroupSorterSettings
1280 {
1281 CSphAttrLocator m_tLocGroupby; ///< locator for @groupby
1282 CSphAttrLocator m_tLocCount; ///< locator for @count
1283 CSphAttrLocator m_tLocDistinct; ///< locator for @distinct
1284 CSphAttrLocator m_tDistinctLoc; ///< locator for attribute to compute count(distinct) for
1285 bool m_bDistinct; ///< whether we need distinct
1286 bool m_bMVA; ///< whether we're grouping by MVA attribute
1287 bool m_bMva64;
1288 CSphGrouper * m_pGrouper; ///< group key calculator
1289 bool m_bImplicit; ///< for queries with aggregate functions but without group by clause
1290 const ISphFilter * m_pAggrFilterTrait; ///< aggregate filter that got owned by grouper
1291 bool m_bJson; ///< whether we're grouping by Json attribute
1292 CSphAttrLocator m_tLocGroupbyStr; ///< locator for @groupbystr
1293
CSphGroupSorterSettingsCSphGroupSorterSettings1294 CSphGroupSorterSettings ()
1295 : m_bDistinct ( false )
1296 , m_bMVA ( false )
1297 , m_bMva64 ( false )
1298 , m_pGrouper ( NULL )
1299 , m_bImplicit ( false )
1300 , m_pAggrFilterTrait ( NULL )
1301 , m_bJson ( false )
1302 {}
1303 };
1304
1305
1306 /// aggregate function interface
1307 class IAggrFunc
1308 {
1309 public:
~IAggrFunc()1310 virtual ~IAggrFunc() {}
Ungroup(CSphMatch *)1311 virtual void Ungroup ( CSphMatch * ) {}
1312 virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool bGrouped ) = 0;
Finalize(CSphMatch *)1313 virtual void Finalize ( CSphMatch * ) {}
1314 };
1315
1316
1317 /// aggregate traits for different attribute types
1318 template < typename T >
1319 class IAggrFuncTraits : public IAggrFunc
1320 {
1321 public:
IAggrFuncTraits(const CSphAttrLocator & tLocator)1322 explicit IAggrFuncTraits ( const CSphAttrLocator & tLocator ) : m_tLocator ( tLocator ) {}
1323 inline T GetValue ( const CSphMatch * pRow );
1324 inline void SetValue ( CSphMatch * pRow, T val );
1325
1326 protected:
1327 CSphAttrLocator m_tLocator;
1328 };
1329
1330 template<>
GetValue(const CSphMatch * pRow)1331 DWORD IAggrFuncTraits<DWORD>::GetValue ( const CSphMatch * pRow )
1332 {
1333 return (DWORD)pRow->GetAttr ( m_tLocator );
1334 }
1335
1336 template<>
SetValue(CSphMatch * pRow,DWORD val)1337 void IAggrFuncTraits<DWORD>::SetValue ( CSphMatch * pRow, DWORD val )
1338 {
1339 pRow->SetAttr ( m_tLocator, val );
1340 }
1341
1342 template<>
GetValue(const CSphMatch * pRow)1343 int64_t IAggrFuncTraits<int64_t>::GetValue ( const CSphMatch * pRow )
1344 {
1345 return pRow->GetAttr ( m_tLocator );
1346 }
1347
1348 template<>
SetValue(CSphMatch * pRow,int64_t val)1349 void IAggrFuncTraits<int64_t>::SetValue ( CSphMatch * pRow, int64_t val )
1350 {
1351 pRow->SetAttr ( m_tLocator, val );
1352 }
1353
1354 template<>
GetValue(const CSphMatch * pRow)1355 float IAggrFuncTraits<float>::GetValue ( const CSphMatch * pRow )
1356 {
1357 return pRow->GetAttrFloat ( m_tLocator );
1358 }
1359
1360 template<>
SetValue(CSphMatch * pRow,float val)1361 void IAggrFuncTraits<float>::SetValue ( CSphMatch * pRow, float val )
1362 {
1363 pRow->SetAttrFloat ( m_tLocator, val );
1364 }
1365
1366
1367
1368 /// SUM() implementation
1369 template < typename T >
1370 class AggrSum_t : public IAggrFuncTraits<T>
1371 {
1372 public:
AggrSum_t(const CSphAttrLocator & tLoc)1373 explicit AggrSum_t ( const CSphAttrLocator & tLoc ) : IAggrFuncTraits<T> ( tLoc )
1374 {}
1375
Update(CSphMatch * pDst,const CSphMatch * pSrc,bool)1376 virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
1377 {
1378 this->SetValue ( pDst, this->GetValue(pDst)+this->GetValue(pSrc) );
1379 }
1380 };
1381
1382
1383 /// AVG() implementation
1384 template < typename T >
1385 class AggrAvg_t : public IAggrFuncTraits<T>
1386 {
1387 protected:
1388 CSphAttrLocator m_tCountLoc;
1389 public:
AggrAvg_t(const CSphAttrLocator & tLoc,const CSphAttrLocator & tCountLoc)1390 AggrAvg_t ( const CSphAttrLocator & tLoc, const CSphAttrLocator & tCountLoc ) : IAggrFuncTraits<T> ( tLoc ), m_tCountLoc ( tCountLoc )
1391 {}
1392
Ungroup(CSphMatch * pDst)1393 virtual void Ungroup ( CSphMatch * pDst )
1394 {
1395 this->SetValue ( pDst, T ( this->GetValue ( pDst ) * pDst->GetAttr ( m_tCountLoc ) ) );
1396 }
1397
Update(CSphMatch * pDst,const CSphMatch * pSrc,bool bGrouped)1398 virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool bGrouped )
1399 {
1400 if ( bGrouped )
1401 this->SetValue ( pDst, T ( this->GetValue ( pDst ) + this->GetValue ( pSrc ) * pSrc->GetAttr ( m_tCountLoc ) ) );
1402 else
1403 this->SetValue ( pDst, this->GetValue ( pDst ) + this->GetValue ( pSrc ) );
1404 }
1405
Finalize(CSphMatch * pDst)1406 virtual void Finalize ( CSphMatch * pDst )
1407 {
1408 this->SetValue ( pDst, T ( this->GetValue ( pDst ) / pDst->GetAttr ( m_tCountLoc ) ) );
1409 }
1410 };
1411
1412
1413 /// MAX() implementation
1414 template < typename T >
1415 class AggrMax_t : public IAggrFuncTraits<T>
1416 {
1417 public:
AggrMax_t(const CSphAttrLocator & tLoc)1418 explicit AggrMax_t ( const CSphAttrLocator & tLoc ) : IAggrFuncTraits<T> ( tLoc )
1419 {}
1420
Update(CSphMatch * pDst,const CSphMatch * pSrc,bool)1421 virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
1422 {
1423 this->SetValue ( pDst, Max ( this->GetValue(pDst), this->GetValue(pSrc) ) );
1424 }
1425 };
1426
1427
1428 /// MIN() implementation
1429 template < typename T >
1430 class AggrMin_t : public IAggrFuncTraits<T>
1431 {
1432 public:
AggrMin_t(const CSphAttrLocator & tLoc)1433 explicit AggrMin_t ( const CSphAttrLocator & tLoc ) : IAggrFuncTraits<T> ( tLoc )
1434 {}
1435
Update(CSphMatch * pDst,const CSphMatch * pSrc,bool)1436 virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
1437 {
1438 this->SetValue ( pDst, Min ( this->GetValue(pDst), this->GetValue(pSrc) ) );
1439 }
1440 };
1441
1442
1443 /// GROUP_CONCAT() implementation
1444 class AggrConcat_t : public IAggrFunc
1445 {
1446 protected:
1447 CSphAttrLocator m_tLoc;
1448
1449 public:
AggrConcat_t(const CSphColumnInfo & tCol)1450 explicit AggrConcat_t ( const CSphColumnInfo & tCol )
1451 : m_tLoc ( tCol.m_tLocator )
1452 {}
1453
Ungroup(CSphMatch *)1454 void Ungroup ( CSphMatch * ) {}
Finalize(CSphMatch *)1455 void Finalize ( CSphMatch * ) {}
1456
Update(CSphMatch * pDst,const CSphMatch * pSrc,bool)1457 void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
1458 {
1459 const char * sDst = (const char*) pDst->GetAttr(m_tLoc);
1460 const char * sSrc = (const char*) pSrc->GetAttr(m_tLoc);
1461 assert ( !sDst || *sDst ); // ensure the string is either NULL, or has data
1462 assert ( !sSrc || *sSrc );
1463
1464 // empty source? kinda weird, but done!
1465 if ( !sSrc )
1466 return;
1467
1468 // empty destination? just clone the source
1469 if ( !sDst )
1470 {
1471 if ( sSrc )
1472 pDst->SetAttr ( m_tLoc, (SphAttr_t)strdup(sSrc) );
1473 return;
1474 }
1475
1476 // both source and destination present? append source to destination
1477 // note that it gotta be manual copying here, as SetSprintf (currently) comes with a 1K limit
1478 assert ( sDst && sSrc );
1479 int iSrc = strlen ( sSrc );
1480 int iDst = strlen ( sDst );
1481 char * sNew = new char [ iSrc+iDst+2 ]; // OPTIMIZE? careful pre-reserve and/or realloc would be even faster
1482 memcpy ( sNew, sDst, iDst );
1483 sNew [ iDst ] = ',';
1484 memcpy ( sNew+iDst+1, sSrc, iSrc );
1485 sNew [ iSrc+iDst+1 ] = '\0';
1486 pDst->SetAttr ( m_tLoc, (SphAttr_t)sNew );
1487 SafeDeleteArray ( sDst );
1488 }
1489 };
1490
1491
1492 /// group sorting functor
1493 template < typename COMPGROUP >
1494 struct GroupSorter_fn : public CSphMatchComparatorState, public MatchSortAccessor_t
1495 {
IsLessGroupSorter_fn1496 bool IsLess ( const MEDIAN_TYPE a, const MEDIAN_TYPE b ) const
1497 {
1498 return COMPGROUP::IsLess ( *b, *a, *this );
1499 }
1500 };
1501
1502
1503 struct MatchCloner_t
1504 {
1505 CSphFixedVector<CSphRowitem> m_dRowBuf;
1506 CSphVector<CSphAttrLocator> m_dAttrsRaw;
1507 CSphVector<CSphAttrLocator> m_dAttrsPtr;
1508 const CSphRsetSchema * m_pSchema;
1509
MatchCloner_tMatchCloner_t1510 MatchCloner_t ()
1511 : m_dRowBuf ( 0 )
1512 , m_pSchema ( NULL )
1513 { }
1514
SetSchemaMatchCloner_t1515 void SetSchema ( const CSphRsetSchema * pSchema )
1516 {
1517 m_pSchema = pSchema;
1518 m_dRowBuf.Reset ( m_pSchema->GetDynamicSize() );
1519 }
1520
CombineMatchCloner_t1521 void Combine ( CSphMatch * pDst, const CSphMatch * pSrc, const CSphMatch * pGroup )
1522 {
1523 assert ( m_pSchema && pDst && pSrc && pGroup );
1524 assert ( pDst!=pGroup );
1525 m_pSchema->CloneMatch ( pDst, *pSrc );
1526
1527 ARRAY_FOREACH ( i, m_dAttrsRaw )
1528 {
1529 pDst->SetAttr ( m_dAttrsRaw[i], pGroup->GetAttr ( m_dAttrsRaw[i] ) );
1530 }
1531
1532 ARRAY_FOREACH ( i, m_dAttrsPtr )
1533 {
1534 assert ( !pDst->GetAttr ( m_dAttrsPtr[i] ) );
1535 const char * sSrc = (const char*)pGroup->GetAttr ( m_dAttrsPtr[i] );
1536 const char * sDst = NULL;
1537 if ( sSrc && *sSrc )
1538 sDst = strdup(sSrc);
1539
1540 pDst->SetAttr ( m_dAttrsPtr[i], (SphAttr_t)sDst );
1541 }
1542 }
1543
CloneMatchCloner_t1544 void Clone ( CSphMatch * pOld, const CSphMatch * pNew )
1545 {
1546 assert ( m_pSchema && pOld && pNew );
1547 if ( pOld->m_pDynamic==NULL ) // no old match has no data to copy, just a fresh but old match
1548 {
1549 m_pSchema->CloneMatch ( pOld, *pNew );
1550 return;
1551 }
1552
1553 memcpy ( m_dRowBuf.Begin(), pOld->m_pDynamic, sizeof(m_dRowBuf[0]) * m_dRowBuf.GetLength() );
1554
1555 // don't let cloning operation to free old string data
1556 // as it will be copied back
1557 ARRAY_FOREACH ( i, m_dAttrsPtr )
1558 pOld->SetAttr ( m_dAttrsPtr[i], 0 );
1559
1560 m_pSchema->CloneMatch ( pOld, *pNew );
1561
1562 ARRAY_FOREACH ( i, m_dAttrsRaw )
1563 pOld->SetAttr ( m_dAttrsRaw[i], sphGetRowAttr ( m_dRowBuf.Begin(), m_dAttrsRaw[i] ) );
1564 ARRAY_FOREACH ( i, m_dAttrsPtr )
1565 pOld->SetAttr ( m_dAttrsPtr[i], sphGetRowAttr ( m_dRowBuf.Begin(), m_dAttrsPtr[i] ) );
1566 }
1567 };
1568
1569
ExtractAggregates(const CSphRsetSchema & tSchema,const CSphAttrLocator & tLocCount,const ESphSortKeyPart * m_pGroupSorterKeyparts,const CSphAttrLocator * m_pGroupSorterLocator,CSphVector<IAggrFunc * > & dAggregates,CSphVector<IAggrFunc * > & dAvgs,MatchCloner_t & tCloner)1570 static void ExtractAggregates ( const CSphRsetSchema & tSchema, const CSphAttrLocator & tLocCount, const ESphSortKeyPart * m_pGroupSorterKeyparts, const CSphAttrLocator * m_pGroupSorterLocator,
1571 CSphVector<IAggrFunc *> & dAggregates, CSphVector<IAggrFunc *> & dAvgs, MatchCloner_t & tCloner )
1572 {
1573 for ( int i=0; i<tSchema.GetAttrsCount(); i++ )
1574 {
1575 const CSphColumnInfo & tAttr = tSchema.GetAttr(i);
1576 bool bMagicAggr = IsGroupbyMagic ( tAttr.m_sName ) || sphIsSortStringInternal ( tAttr.m_sName.cstr() ); // magic legacy aggregates
1577
1578 if ( tAttr.m_eAggrFunc==SPH_AGGR_NONE || bMagicAggr )
1579 continue;
1580
1581 switch ( tAttr.m_eAggrFunc )
1582 {
1583 case SPH_AGGR_SUM:
1584 switch ( tAttr.m_eAttrType )
1585 {
1586 case SPH_ATTR_INTEGER: dAggregates.Add ( new AggrSum_t<DWORD> ( tAttr.m_tLocator ) ); break;
1587 case SPH_ATTR_BIGINT: dAggregates.Add ( new AggrSum_t<int64_t> ( tAttr.m_tLocator ) ); break;
1588 case SPH_ATTR_FLOAT: dAggregates.Add ( new AggrSum_t<float> ( tAttr.m_tLocator ) ); break;
1589 default: assert ( 0 && "internal error: unhandled aggregate type" ); break;
1590 }
1591 break;
1592
1593 case SPH_AGGR_AVG:
1594 switch ( tAttr.m_eAttrType )
1595 {
1596 case SPH_ATTR_INTEGER: dAggregates.Add ( new AggrAvg_t<DWORD> ( tAttr.m_tLocator, tLocCount ) ); break;
1597 case SPH_ATTR_BIGINT: dAggregates.Add ( new AggrAvg_t<int64_t> ( tAttr.m_tLocator, tLocCount ) ); break;
1598 case SPH_ATTR_FLOAT: dAggregates.Add ( new AggrAvg_t<float> ( tAttr.m_tLocator, tLocCount ) ); break;
1599 default: assert ( 0 && "internal error: unhandled aggregate type" ); break;
1600 }
1601 // store avg to calculate these attributes prior to groups sort
1602 for ( int iState=0; iState<CSphMatchComparatorState::MAX_ATTRS; iState++ )
1603 {
1604 ESphSortKeyPart eKeypart = m_pGroupSorterKeyparts[iState];
1605 CSphAttrLocator tLoc = m_pGroupSorterLocator[iState];
1606 if ( ( eKeypart==SPH_KEYPART_INT || eKeypart==SPH_KEYPART_FLOAT )
1607 && tLoc.m_bDynamic==tAttr.m_tLocator.m_bDynamic && tLoc.m_iBitOffset==tAttr.m_tLocator.m_iBitOffset
1608 && tLoc.m_iBitCount==tAttr.m_tLocator.m_iBitCount )
1609 {
1610 dAvgs.Add ( dAggregates.Last() );
1611 break;
1612 }
1613 }
1614 break;
1615
1616 case SPH_AGGR_MIN:
1617 switch ( tAttr.m_eAttrType )
1618 {
1619 case SPH_ATTR_INTEGER: dAggregates.Add ( new AggrMin_t<DWORD> ( tAttr.m_tLocator ) ); break;
1620 case SPH_ATTR_BIGINT: dAggregates.Add ( new AggrMin_t<int64_t> ( tAttr.m_tLocator ) ); break;
1621 case SPH_ATTR_FLOAT: dAggregates.Add ( new AggrMin_t<float> ( tAttr.m_tLocator ) ); break;
1622 default: assert ( 0 && "internal error: unhandled aggregate type" ); break;
1623 }
1624 break;
1625
1626 case SPH_AGGR_MAX:
1627 switch ( tAttr.m_eAttrType )
1628 {
1629 case SPH_ATTR_INTEGER: dAggregates.Add ( new AggrMax_t<DWORD> ( tAttr.m_tLocator ) ); break;
1630 case SPH_ATTR_BIGINT: dAggregates.Add ( new AggrMax_t<int64_t> ( tAttr.m_tLocator ) ); break;
1631 case SPH_ATTR_FLOAT: dAggregates.Add ( new AggrMax_t<float> ( tAttr.m_tLocator ) ); break;
1632 default: assert ( 0 && "internal error: unhandled aggregate type" ); break;
1633 }
1634 break;
1635
1636 case SPH_AGGR_CAT:
1637 dAggregates.Add ( new AggrConcat_t ( tAttr ) );
1638 break;
1639
1640 default:
1641 assert ( 0 && "internal error: unhandled aggregate function" );
1642 break;
1643 }
1644
1645 if ( tAttr.m_eAggrFunc==SPH_AGGR_CAT )
1646 tCloner.m_dAttrsPtr.Add ( tAttr.m_tLocator );
1647 else
1648 tCloner.m_dAttrsRaw.Add ( tAttr.m_tLocator );
1649 }
1650 }
1651
1652
1653 /// match sorter with k-buffering and group-by
1654 template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
1655 class CSphKBufferGroupSorter : public CSphMatchQueueTraits, protected CSphGroupSorterSettings
1656 {
1657 protected:
1658 ESphGroupBy m_eGroupBy; ///< group-by function
1659 CSphGrouper * m_pGrouper;
1660
1661 CSphFixedHash < CSphMatch *, SphGroupKey_t, IdentityHash_fn > m_hGroup2Match;
1662
1663 protected:
1664 int m_iLimit; ///< max matches to be retrieved
1665
1666 CSphUniqounter m_tUniq;
1667 bool m_bSortByDistinct;
1668
1669 GroupSorter_fn<COMPGROUP> m_tGroupSorter;
1670 const ISphMatchComparator * m_pComp;
1671
1672 CSphVector<IAggrFunc *> m_dAggregates;
1673 CSphVector<IAggrFunc *> m_dAvgs;
1674 const ISphFilter * m_pAggrFilter; ///< aggregate filter for matches on flatten
1675 MatchCloner_t m_tPregroup;
1676
1677 static const int GROUPBY_FACTOR = 4; ///< allocate this times more storage when doing group-by (k, as in k-buffer)
1678
1679 public:
1680 /// ctor
CSphKBufferGroupSorter(const ISphMatchComparator * pComp,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings)1681 CSphKBufferGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings ) // FIXME! make k configurable
1682 : CSphMatchQueueTraits ( pQuery->m_iMaxMatches*GROUPBY_FACTOR, true )
1683 , CSphGroupSorterSettings ( tSettings )
1684 , m_eGroupBy ( pQuery->m_eGroupFunc )
1685 , m_pGrouper ( tSettings.m_pGrouper )
1686 , m_hGroup2Match ( pQuery->m_iMaxMatches*GROUPBY_FACTOR )
1687 , m_iLimit ( pQuery->m_iMaxMatches )
1688 , m_bSortByDistinct ( false )
1689 , m_pComp ( pComp )
1690 , m_pAggrFilter ( tSettings.m_pAggrFilterTrait )
1691 {
1692 assert ( GROUPBY_FACTOR>1 );
1693 assert ( DISTINCT==false || tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
1694
1695 if_const ( NOTIFICATIONS )
1696 m_dJustPopped.Reserve ( m_iSize );
1697 }
1698
1699 /// schema setup
SetSchema(CSphRsetSchema & tSchema)1700 virtual void SetSchema ( CSphRsetSchema & tSchema )
1701 {
1702 m_tSchema = tSchema;
1703 m_tPregroup.SetSchema ( &m_tSchema );
1704 m_tPregroup.m_dAttrsRaw.Add ( m_tLocGroupby );
1705 m_tPregroup.m_dAttrsRaw.Add ( m_tLocCount );
1706 if_const ( DISTINCT )
1707 {
1708 m_tPregroup.m_dAttrsRaw.Add ( m_tLocDistinct );
1709 }
1710 ExtractAggregates ( m_tSchema, m_tLocCount, m_tGroupSorter.m_eKeypart, m_tGroupSorter.m_tLocator, m_dAggregates, m_dAvgs, m_tPregroup );
1711 }
1712
1713 /// dtor
~CSphKBufferGroupSorter()1714 ~CSphKBufferGroupSorter ()
1715 {
1716 SafeDelete ( m_pComp );
1717 SafeDelete ( m_pGrouper );
1718 SafeDelete ( m_pAggrFilter );
1719 ARRAY_FOREACH ( i, m_dAggregates )
1720 SafeDelete ( m_dAggregates[i] );
1721 }
1722
1723 /// check if this sorter does groupby
IsGroupby() const1724 virtual bool IsGroupby () const
1725 {
1726 return true;
1727 }
1728
CanMulti() const1729 virtual bool CanMulti () const
1730 {
1731 if ( m_pGrouper && !m_pGrouper->CanMulti() )
1732 return false;
1733
1734 if ( HasString ( &m_tState ) )
1735 return false;
1736
1737 if ( HasString ( &m_tGroupSorter ) )
1738 return false;
1739
1740 return true;
1741 }
1742
1743 /// set string pool pointer (for string+groupby sorters)
SetStringPool(const BYTE * pStrings)1744 void SetStringPool ( const BYTE * pStrings )
1745 {
1746 m_pGrouper->SetStringPool ( pStrings );
1747 }
1748
1749 /// add entry to the queue
Push(const CSphMatch & tEntry)1750 virtual bool Push ( const CSphMatch & tEntry )
1751 {
1752 SphGroupKey_t uGroupKey = m_pGrouper->KeyFromMatch ( tEntry );
1753 return PushEx ( tEntry, uGroupKey, false, false );
1754 }
1755
1756 /// add grouped entry to the queue
PushGrouped(const CSphMatch & tEntry,bool)1757 virtual bool PushGrouped ( const CSphMatch & tEntry, bool )
1758 {
1759 return PushEx ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), true, false );
1760 }
1761
1762 /// add entry to the queue
PushEx(const CSphMatch & tEntry,const SphGroupKey_t uGroupKey,bool bGrouped,bool,SphAttr_t * pAttr=NULL)1763 virtual bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool bGrouped, bool, SphAttr_t * pAttr=NULL )
1764 {
1765 if_const ( NOTIFICATIONS )
1766 {
1767 m_iJustPushed = 0;
1768 m_dJustPopped.Resize ( 0 );
1769 }
1770
1771 // if this group is already hashed, we only need to update the corresponding match
1772 CSphMatch ** ppMatch = m_hGroup2Match ( uGroupKey );
1773 if ( ppMatch )
1774 {
1775 CSphMatch * pMatch = (*ppMatch);
1776 assert ( pMatch );
1777 assert ( pMatch->GetAttr ( m_tLocGroupby )==uGroupKey );
1778 assert ( pMatch->m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
1779
1780 if ( bGrouped )
1781 {
1782 // it's already grouped match
1783 // sum grouped matches count
1784 pMatch->SetAttr ( m_tLocCount, pMatch->GetAttr ( m_tLocCount ) + tEntry.GetAttr ( m_tLocCount ) ); // OPTIMIZE! AddAttr()?
1785 } else
1786 {
1787 // it's a simple match
1788 // increase grouped matches count
1789 pMatch->SetAttr ( m_tLocCount, 1 + pMatch->GetAttr ( m_tLocCount ) ); // OPTIMIZE! IncAttr()?
1790 }
1791
1792 // update aggregates
1793 ARRAY_FOREACH ( i, m_dAggregates )
1794 m_dAggregates[i]->Update ( pMatch, &tEntry, bGrouped );
1795
1796 // if new entry is more relevant, update from it
1797 if ( m_pComp->VirtualIsLess ( *pMatch, tEntry, m_tState ) )
1798 {
1799 if_const ( NOTIFICATIONS )
1800 {
1801 m_iJustPushed = tEntry.m_uDocID;
1802 m_dJustPopped.Add ( pMatch->m_uDocID );
1803 }
1804
1805 // clone the low part of the match
1806 m_tPregroup.Clone ( pMatch, &tEntry );
1807
1808 // update @groupbystr value, if available
1809 if ( pAttr && m_tLocGroupbyStr.m_bDynamic )
1810 pMatch->SetAttr ( m_tLocGroupbyStr, *pAttr );
1811 }
1812 }
1813
1814 // submit actual distinct value in all cases
1815 if_const ( DISTINCT )
1816 {
1817 int iCount = 1;
1818 if ( bGrouped )
1819 iCount = (int)tEntry.GetAttr ( m_tLocDistinct );
1820 m_tUniq.Add ( SphGroupedValue_t ( uGroupKey, tEntry.GetAttr ( m_tDistinctLoc ), iCount ) ); // OPTIMIZE! use simpler locator here?
1821 }
1822
1823 // it's a dupe anyway, so we shouldn't update total matches count
1824 if ( ppMatch )
1825 return false;
1826
1827 // if we're full, let's cut off some worst groups
1828 if ( m_iUsed==m_iSize )
1829 CutWorst ( m_iLimit * (int)(GROUPBY_FACTOR/2) );
1830
1831 // do add
1832 assert ( m_iUsed<m_iSize );
1833 CSphMatch & tNew = m_pData [ m_iUsed++ ];
1834 m_tSchema.CloneMatch ( &tNew, tEntry );
1835
1836 if_const ( NOTIFICATIONS )
1837 m_iJustPushed = tNew.m_uDocID;
1838
1839 if ( !bGrouped )
1840 {
1841 tNew.SetAttr ( m_tLocGroupby, uGroupKey );
1842 tNew.SetAttr ( m_tLocCount, 1 );
1843 if_const ( DISTINCT )
1844 tNew.SetAttr ( m_tLocDistinct, 0 );
1845
1846 // set @groupbystr value if available
1847 if ( pAttr && m_tLocGroupbyStr.m_bDynamic )
1848 tNew.SetAttr ( m_tLocGroupbyStr, *pAttr );
1849 } else
1850 {
1851 ARRAY_FOREACH ( i, m_dAggregates )
1852 m_dAggregates[i]->Ungroup ( &tNew );
1853 }
1854
1855 m_hGroup2Match.Add ( &tNew, uGroupKey );
1856 m_iTotal++;
1857 return true;
1858 }
1859
CalcAvg(bool bGroup)1860 void CalcAvg ( bool bGroup )
1861 {
1862 if ( !m_dAvgs.GetLength() )
1863 return;
1864
1865 CSphMatch * pMatch = m_pData;
1866 CSphMatch * pEnd = pMatch + m_iUsed;
1867 while ( pMatch<pEnd )
1868 {
1869 ARRAY_FOREACH ( j, m_dAvgs )
1870 {
1871 if ( bGroup )
1872 m_dAvgs[j]->Finalize ( pMatch );
1873 else
1874 m_dAvgs[j]->Ungroup ( pMatch );
1875 }
1876 ++pMatch;
1877 }
1878 }
1879
1880 /// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch * pTo,int iTag)1881 int Flatten ( CSphMatch * pTo, int iTag )
1882 {
1883 CountDistinct ();
1884
1885 CalcAvg ( true );
1886 SortGroups ();
1887
1888 CSphVector<IAggrFunc *> dAggrs;
1889 if ( m_dAggregates.GetLength()!=m_dAvgs.GetLength() )
1890 {
1891 dAggrs = m_dAggregates;
1892 ARRAY_FOREACH ( i, m_dAvgs )
1893 dAggrs.RemoveValue ( m_dAvgs[i] );
1894 }
1895
1896 // FIXME!!! we should provide up-to max_matches to output buffer
1897 const CSphMatch * pBegin = pTo;
1898 int iLen = GetLength ();
1899 for ( int i=0; i<iLen; ++i )
1900 {
1901 CSphMatch & tMatch = m_pData[i];
1902 ARRAY_FOREACH ( j, dAggrs )
1903 dAggrs[j]->Finalize ( &tMatch );
1904
1905 // HAVING filtering
1906 if ( m_pAggrFilter && !m_pAggrFilter->Eval ( tMatch ) )
1907 continue;
1908
1909 m_tSchema.CloneMatch ( pTo, tMatch );
1910 if ( iTag>=0 )
1911 pTo->m_iTag = iTag;
1912
1913 pTo++;
1914 }
1915
1916 m_iUsed = 0;
1917 m_iTotal = 0;
1918
1919 m_hGroup2Match.Reset ();
1920 if_const ( DISTINCT )
1921 m_tUniq.Resize ( 0 );
1922
1923 return ( pTo-pBegin );
1924 }
1925
1926 /// get entries count
GetLength() const1927 int GetLength () const
1928 {
1929 return Min ( m_iUsed, m_iLimit );
1930 }
1931
1932 /// set group comparator state
SetGroupState(const CSphMatchComparatorState & tState)1933 void SetGroupState ( const CSphMatchComparatorState & tState )
1934 {
1935 m_tGroupSorter.m_fnStrCmp = tState.m_fnStrCmp;
1936
1937 // FIXME! manual bitwise copying.. yuck
1938 for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
1939 {
1940 m_tGroupSorter.m_eKeypart[i] = tState.m_eKeypart[i];
1941 m_tGroupSorter.m_tLocator[i] = tState.m_tLocator[i];
1942 }
1943 m_tGroupSorter.m_uAttrDesc = tState.m_uAttrDesc;
1944 m_tGroupSorter.m_iNow = tState.m_iNow;
1945
1946 // check whether we sort by distinct
1947 if_const ( DISTINCT && m_tDistinctLoc.m_iBitOffset>=0 )
1948 for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
1949 if ( m_tGroupSorter.m_tLocator[i].m_iBitOffset==m_tDistinctLoc.m_iBitOffset )
1950 {
1951 m_bSortByDistinct = true;
1952 break;
1953 }
1954 }
1955
1956 protected:
1957 /// count distinct values if necessary
CountDistinct()1958 void CountDistinct ()
1959 {
1960 if_const ( DISTINCT )
1961 {
1962 m_tUniq.Sort ();
1963 SphGroupKey_t uGroup;
1964 for ( int iCount = m_tUniq.CountStart ( &uGroup ); iCount; iCount = m_tUniq.CountNext ( &uGroup ) )
1965 {
1966 CSphMatch ** ppMatch = m_hGroup2Match(uGroup);
1967 if ( ppMatch )
1968 (*ppMatch)->SetAttr ( m_tLocDistinct, iCount );
1969 }
1970 }
1971 }
1972
1973 /// cut worst N groups off the buffer tail
CutWorst(int iBound)1974 void CutWorst ( int iBound )
1975 {
1976 // sort groups
1977 if ( m_bSortByDistinct )
1978 CountDistinct ();
1979
1980 CalcAvg ( true );
1981 SortGroups ();
1982 CalcAvg ( false );
1983
1984 if_const ( NOTIFICATIONS )
1985 {
1986 for ( int i = iBound; i < m_iUsed; ++i )
1987 m_dJustPopped.Add ( m_pData[i].m_uDocID );
1988 }
1989
1990 // cleanup unused distinct stuff
1991 if_const ( DISTINCT )
1992 {
1993 // build kill-list
1994 CSphVector<SphGroupKey_t> dRemove;
1995 dRemove.Resize ( m_iUsed-iBound );
1996 ARRAY_FOREACH ( i, dRemove )
1997 dRemove[i] = m_pData[iBound+i].GetAttr ( m_tLocGroupby );
1998
1999 // sort and compact
2000 if ( !m_bSortByDistinct )
2001 m_tUniq.Sort ();
2002 m_tUniq.Compact ( &dRemove[0], m_iUsed-iBound );
2003 }
2004
2005 // rehash
2006 m_hGroup2Match.Reset ();
2007 for ( int i=0; i<iBound; i++ )
2008 m_hGroup2Match.Add ( m_pData+i, m_pData[i].GetAttr ( m_tLocGroupby ) );
2009
2010 // cut groups
2011 m_iUsed = iBound;
2012 }
2013
2014 /// sort groups buffer
SortGroups()2015 void SortGroups ()
2016 {
2017 sphSort ( m_pData, m_iUsed, m_tGroupSorter, m_tGroupSorter );
2018 }
2019
Finalize(ISphMatchProcessor & tProcessor,bool)2020 virtual void Finalize ( ISphMatchProcessor & tProcessor, bool )
2021 {
2022 if ( !GetLength() )
2023 return;
2024
2025 if ( m_iUsed>m_iLimit )
2026 CutWorst ( m_iLimit );
2027
2028 // just evaluate in heap order
2029 CSphMatch * pCur = m_pData;
2030 const CSphMatch * pEnd = m_pData + m_iUsed;
2031 while ( pCur<pEnd )
2032 tProcessor.Process ( pCur++ );
2033 }
2034 };
2035
2036
2037 /// match sorter with k-buffering and N-best group-by
2038 template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
2039 class CSphKBufferNGroupSorter : public CSphMatchQueueTraits, protected CSphGroupSorterSettings
2040 {
2041 protected:
2042 ESphGroupBy m_eGroupBy; ///< group-by function
2043 CSphGrouper * m_pGrouper;
2044
2045 CSphFixedHash < CSphMatch *, SphGroupKey_t, IdentityHash_fn > m_hGroup2Match;
2046
2047 protected:
2048 int m_iLimit; ///< max matches to be retrieved
2049 int m_iGLimit; ///< limit per one group
2050 CSphFixedVector<int> m_dGroupByList; ///< chains of equal matches from groups
2051 CSphFixedVector<int> m_dGroupsLen; ///< lengths of chains of equal matches from groups
2052 int m_iFreeHeads; ///< insertion point for head matches.
2053 CSphFreeList m_dFreeTails; ///< where to put tails of the subgroups
2054 SphGroupKey_t m_uLastGroupKey; ///< helps to determine in pushEx whether the new subgroup started
2055 #ifndef NDEBUG
2056 int m_iruns; ///< helpers for conditional breakpoints on debug
2057 int m_ipushed;
2058 #endif
2059 CSphUniqounter m_tUniq;
2060 bool m_bSortByDistinct;
2061
2062 GroupSorter_fn<COMPGROUP> m_tGroupSorter;
2063 const ISphMatchComparator * m_pComp;
2064
2065 CSphVector<IAggrFunc *> m_dAggregates;
2066 CSphVector<IAggrFunc *> m_dAvgs;
2067 const ISphFilter * m_pAggrFilter; ///< aggregate filter for matches on flatten
2068 MatchCloner_t m_tPregroup;
2069
2070 static const int GROUPBY_FACTOR = 4; ///< allocate this times more storage when doing group-by (k, as in k-buffer)
2071
2072 protected:
GetFreePos(bool bTailPos)2073 inline int GetFreePos ( bool bTailPos )
2074 {
2075 // if we're full, let's cut off some worst groups
2076 if ( m_iUsed==m_iSize )
2077 {
2078 CutWorst ( m_iLimit * (int)(GROUPBY_FACTOR/2) );
2079 // don't return true value for tail this case,
2080 // since the context might be changed by the CutWorst!
2081 if ( bTailPos )
2082 return -1;
2083 }
2084
2085 // do add
2086 assert ( m_iUsed<m_iSize );
2087 ++m_iUsed;
2088 if ( bTailPos )
2089 {
2090 int iRes = m_dFreeTails.Get() + m_iSize;
2091 assert ( iRes<CSphMatchQueueTraits::GetDataLength() );
2092 return iRes;
2093 } else
2094 {
2095 assert ( m_iFreeHeads<m_iSize );
2096 return m_iFreeHeads++;
2097 }
2098 }
2099
2100 public:
2101 /// ctor
CSphKBufferNGroupSorter(const ISphMatchComparator * pComp,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings)2102 CSphKBufferNGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings ) // FIXME! make k configurable
2103 : CSphMatchQueueTraits ( ( pQuery->m_iGroupbyLimit>1 ? 2 : 1 ) * pQuery->m_iMaxMatches * GROUPBY_FACTOR, true )
2104 , CSphGroupSorterSettings ( tSettings )
2105 , m_eGroupBy ( pQuery->m_eGroupFunc )
2106 , m_pGrouper ( tSettings.m_pGrouper )
2107 , m_hGroup2Match ( pQuery->m_iMaxMatches*GROUPBY_FACTOR*2 )
2108 , m_iLimit ( pQuery->m_iMaxMatches )
2109 , m_iGLimit ( pQuery->m_iGroupbyLimit )
2110 , m_dGroupByList ( 0 )
2111 , m_dGroupsLen ( 0 )
2112 , m_iFreeHeads ( 0 )
2113 , m_uLastGroupKey ( -1 )
2114 , m_bSortByDistinct ( false )
2115 , m_pComp ( pComp )
2116 , m_pAggrFilter ( tSettings.m_pAggrFilterTrait )
2117 {
2118 assert ( GROUPBY_FACTOR>1 );
2119 assert ( DISTINCT==false || tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
2120 assert ( m_iGLimit > 1 );
2121
2122 if_const ( NOTIFICATIONS )
2123 m_dJustPopped.Reserve ( m_iSize );
2124
2125 // trick! This case we allocated 2*m_iSize mem.
2126 // range 0..m_iSize used for 1-st matches of each subgroup (i.e., for heads)
2127 // range m_iSize+1..2*m_iSize used for the tails.
2128 m_dGroupByList.Reset ( m_iSize );
2129 m_dGroupsLen.Reset ( m_iSize );
2130 m_iSize >>= 1;
2131 ARRAY_FOREACH ( i, m_dGroupByList )
2132 {
2133 m_dGroupByList[i] = -1;
2134 m_dGroupsLen[i] = 0;
2135 }
2136 m_dFreeTails.Reset ( m_iSize );
2137
2138 #ifndef NDEBUG
2139 m_iruns = 0;
2140 m_ipushed = 0;
2141 #endif
2142 }
2143
2144 // only for n-group
GetDataLength() const2145 virtual int GetDataLength () const
2146 {
2147 return CSphMatchQueueTraits::GetDataLength()/2;
2148 }
2149
2150 /// schema setup
SetSchema(CSphRsetSchema & tSchema)2151 virtual void SetSchema ( CSphRsetSchema & tSchema )
2152 {
2153 m_tSchema = tSchema;
2154
2155 m_tPregroup.SetSchema ( &m_tSchema );
2156 m_tPregroup.m_dAttrsRaw.Add ( m_tLocGroupby );
2157 m_tPregroup.m_dAttrsRaw.Add ( m_tLocCount );
2158 if_const ( DISTINCT )
2159 {
2160 m_tPregroup.m_dAttrsRaw.Add ( m_tLocDistinct );
2161 }
2162 ExtractAggregates ( m_tSchema, m_tLocCount, m_tGroupSorter.m_eKeypart, m_tGroupSorter.m_tLocator, m_dAggregates, m_dAvgs, m_tPregroup );
2163 }
2164
2165 /// dtor
~CSphKBufferNGroupSorter()2166 virtual ~CSphKBufferNGroupSorter ()
2167 {
2168 SafeDelete ( m_pComp );
2169 SafeDelete ( m_pGrouper );
2170 SafeDelete ( m_pAggrFilter );
2171 ARRAY_FOREACH ( i, m_dAggregates )
2172 SafeDelete ( m_dAggregates[i] );
2173 }
2174
2175 /// check if this sorter does groupby
IsGroupby() const2176 virtual bool IsGroupby () const
2177 {
2178 return true;
2179 }
2180
CanMulti() const2181 virtual bool CanMulti () const
2182 {
2183 if ( m_pGrouper && !m_pGrouper->CanMulti() )
2184 return false;
2185
2186 if ( HasString ( &m_tState ) )
2187 return false;
2188
2189 if ( HasString ( &m_tGroupSorter ) )
2190 return false;
2191
2192 return true;
2193 }
2194
2195 /// set string pool pointer (for string+groupby sorters)
SetStringPool(const BYTE * pStrings)2196 void SetStringPool ( const BYTE * pStrings )
2197 {
2198 m_pGrouper->SetStringPool ( pStrings );
2199 }
2200
2201 /// add entry to the queue
Push(const CSphMatch & tEntry)2202 virtual bool Push ( const CSphMatch & tEntry )
2203 {
2204 SphGroupKey_t uGroupKey = m_pGrouper->KeyFromMatch ( tEntry );
2205 return PushEx ( tEntry, uGroupKey, false, false );
2206 }
2207
2208 /// add grouped entry to the queue
PushGrouped(const CSphMatch & tEntry,bool bNewSet)2209 virtual bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet )
2210 {
2211 return PushEx ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), true, bNewSet );
2212 }
2213
FreeMatchChain(int iPos)2214 void FreeMatchChain ( int iPos )
2215 {
2216 while ( iPos>=0 )
2217 {
2218 if_const ( NOTIFICATIONS )
2219 m_dJustPopped.Add ( m_pData[iPos].m_uDocID );
2220
2221 int iLastPos = iPos;
2222 m_tSchema.FreeStringPtrs ( m_pData + iPos );
2223 iPos = m_dGroupByList[iPos];
2224 m_dGroupByList[iLastPos] = -1;
2225 if ( iLastPos>=m_iSize )
2226 m_dFreeTails.Free ( iLastPos - m_iSize );
2227 }
2228 }
2229
2230 // insert a match into subgroup, or discard it
2231 // returns 0 if no place now (need to flush)
2232 // returns 1 if value was discarded or replaced other existing value
2233 // returns 2 if value was added.
2234 enum InsertRes_e
2235 {
2236 INSERT_NONE,
2237 INSERT_REPLACED,
2238 INSERT_ADDED
2239 };
2240
InsertMatch(int iPos,const CSphMatch & tEntry)2241 InsertRes_e InsertMatch ( int iPos, const CSphMatch & tEntry )
2242 {
2243 const int iHead = iPos;
2244 int iPrev = -1;
2245 const bool bDoAdd = ( m_dGroupsLen[iHead]<m_iGLimit );
2246 while ( iPos>=0 )
2247 {
2248 CSphMatch * pMatch = m_pData+iPos;
2249 if ( m_pComp->VirtualIsLess ( *pMatch, tEntry, m_tState ) ) // the tEntry is better than current *pMatch
2250 {
2251 int iNew = iPos;
2252 if ( bDoAdd )
2253 {
2254 iNew = GetFreePos ( true ); // add to the tails (2-nd subrange)
2255 if ( iNew<0 )
2256 return INSERT_NONE;
2257 } else
2258 {
2259 int iPreLast = iPrev;
2260 while ( m_dGroupByList[iNew]>=0 )
2261 {
2262 iPreLast = iNew;
2263 iNew = m_dGroupByList[iNew];
2264 }
2265
2266 m_tSchema.FreeStringPtrs ( m_pData + iNew );
2267 m_dGroupByList[iPreLast] = -1;
2268
2269 if ( iPos==iNew ) // avoid cycle link to itself
2270 iPos = -1;
2271 }
2272
2273 CSphMatch & tNew = m_pData [ iNew ];
2274
2275 if_const ( NOTIFICATIONS )
2276 {
2277 m_iJustPushed = tEntry.m_uDocID;
2278 if ( tNew.m_uDocID!=0 )
2279 m_dJustPopped.Add ( tNew.m_uDocID );
2280 }
2281 if ( bDoAdd )
2282 ++m_dGroupsLen[iHead];
2283
2284 if ( iPos==iHead ) // this is the first elem, need to copy groupby staff from it
2285 {
2286 // trick point! The first elem MUST live in the low half of the pool.
2287 // So, replacing the first is actually moving existing one to the last half,
2288 // then overwriting the one in the low half with the new value and link them
2289 m_tPregroup.Clone ( &tNew, pMatch );
2290 m_tPregroup.Clone ( pMatch, &tEntry );
2291 m_dGroupByList[iNew] = m_dGroupByList[iPos];
2292 m_dGroupByList[iPos] = iNew;
2293 } else // this is elem somewhere in the chain, just shift it.
2294 {
2295 m_tPregroup.Clone ( &tNew, &tEntry );
2296 m_dGroupByList[iPrev] = iNew;
2297 m_dGroupByList[iNew] = iPos;
2298 }
2299 break;
2300 }
2301 iPrev = iPos;
2302 iPos = m_dGroupByList[iPos];
2303 }
2304
2305 if ( iPos<0 && bDoAdd ) // this item is less than everything, but still appropriate
2306 {
2307 int iNew = GetFreePos ( true ); // add to the tails (2-nd subrange)
2308 if ( iNew<0 )
2309 return INSERT_NONE;
2310
2311 CSphMatch & tNew = m_pData [ iNew ];
2312 m_tPregroup.Clone ( &tNew, &tEntry );
2313 m_dGroupByList[iPrev] = iNew;
2314 m_dGroupByList[iNew] = iPos;
2315
2316 if_const ( NOTIFICATIONS )
2317 m_iJustPushed = tEntry.m_uDocID;
2318 if ( bDoAdd )
2319 ++m_dGroupsLen[iHead];
2320 }
2321
2322 return bDoAdd ? INSERT_ADDED : INSERT_REPLACED;
2323 }
2324
2325 #ifndef NDEBUG
CheckIntegrity()2326 void CheckIntegrity()
2327 {
2328 int iTotalLen = 0;
2329 for ( int i=0; i<m_iFreeHeads; ++i )
2330 {
2331 int iLen = 0;
2332 int iCur = i;
2333 while ( iCur>=0 )
2334 {
2335 int iNext = m_dGroupByList[iCur];
2336 assert ( iNext==-1 || m_pData[iCur].GetAttr ( m_tLocGroupby )==0 || m_pData[iNext].GetAttr ( m_tLocGroupby )==0
2337 || m_pData[iCur].GetAttr ( m_tLocGroupby )==m_pData[iNext].GetAttr ( m_tLocGroupby ) );
2338 ++iLen;
2339 iCur = iNext;
2340 }
2341 assert ( m_dGroupsLen[i]==iLen );
2342 iTotalLen += iLen;
2343 }
2344 assert ( iTotalLen==m_iUsed );
2345 }
2346 #define CHECKINTEGRITY() CheckIntegrity()
2347 #else
2348 #define CHECKINTEGRITY()
2349 #endif
2350
2351 /// add entry to the queue
PushEx(const CSphMatch & tEntry,const SphGroupKey_t uGroupKey,bool bGrouped,bool bNewSet)2352 virtual bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool bGrouped, bool bNewSet )
2353 {
2354 #ifndef NDEBUG
2355 ++m_ipushed;
2356 #endif
2357 CHECKINTEGRITY();
2358 if_const ( NOTIFICATIONS )
2359 {
2360 m_iJustPushed = 0;
2361 m_dJustPopped.Resize ( 0 );
2362 }
2363
2364 // if this group is already hashed, we only need to update the corresponding match
2365 CSphMatch ** ppMatch = m_hGroup2Match ( uGroupKey );
2366 if ( ppMatch )
2367 {
2368 CSphMatch * pMatch = (*ppMatch);
2369 assert ( pMatch );
2370 assert ( pMatch->GetAttr ( m_tLocGroupby )==uGroupKey );
2371 assert ( pMatch->m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
2372
2373 if ( bGrouped )
2374 {
2375 // it's already grouped match
2376 // sum grouped matches count
2377 if ( bNewSet || uGroupKey!=m_uLastGroupKey )
2378 {
2379 pMatch->SetAttr ( m_tLocCount, pMatch->GetAttr ( m_tLocCount ) + tEntry.GetAttr ( m_tLocCount ) ); // OPTIMIZE! AddAttr()?
2380 m_uLastGroupKey = uGroupKey;
2381 bNewSet = true;
2382 }
2383 } else
2384 {
2385 // it's a simple match
2386 // increase grouped matches count
2387 pMatch->SetAttr ( m_tLocCount, 1 + pMatch->GetAttr ( m_tLocCount ) ); // OPTIMIZE! IncAttr()?
2388 }
2389
2390 bNewSet |= !bGrouped;
2391
2392 // update aggregates
2393 if ( bNewSet )
2394 ARRAY_FOREACH ( i, m_dAggregates )
2395 m_dAggregates[i]->Update ( pMatch, &tEntry, bGrouped );
2396
2397 // if new entry is more relevant, update from it
2398 InsertRes_e eRes = InsertMatch ( pMatch-m_pData, tEntry );
2399 if ( eRes==INSERT_NONE )
2400 {
2401 // need to keep all poped values
2402 CSphTightVector<SphDocID_t> dJustPopped;
2403 dJustPopped.SwapData ( m_dJustPopped );
2404
2405 // was no insertion because cache cleaning. Recall myself
2406 PushEx ( tEntry, uGroupKey, bGrouped, bNewSet );
2407
2408 // post Push work
2409 ARRAY_FOREACH ( i, dJustPopped )
2410 m_dJustPopped.Add ( dJustPopped[i] );
2411 CSphMatch ** ppDec = m_hGroup2Match ( uGroupKey );
2412 assert ( ppDec );
2413 (*ppDec)->SetAttr ( m_tLocCount, (*ppDec)->GetAttr ( m_tLocCount )-1 );
2414
2415 } else if ( eRes==INSERT_ADDED )
2416 {
2417 if ( bGrouped )
2418 return true;
2419 ++m_iTotal;
2420 }
2421 }
2422
2423 // submit actual distinct value in all cases
2424 if_const ( DISTINCT )
2425 {
2426 int iCount = 1;
2427 if ( bGrouped )
2428 iCount = (int)tEntry.GetAttr ( m_tLocDistinct );
2429 m_tUniq.Add ( SphGroupedValue_t ( uGroupKey, tEntry.GetAttr ( m_tDistinctLoc ), iCount ) ); // OPTIMIZE! use simpler locator here?
2430 }
2431 CHECKINTEGRITY();
2432 // it's a dupe anyway, so we shouldn't update total matches count
2433 if ( ppMatch )
2434 return false;
2435
2436 // do add
2437 int iNew = GetFreePos ( false );
2438 assert ( iNew>=0 && iNew<m_iSize );
2439
2440 CSphMatch & tNew = m_pData [ iNew ];
2441 m_tSchema.CloneMatch ( &tNew, tEntry );
2442
2443 m_dGroupByList [ iNew ] = -1;
2444 m_dGroupsLen [ iNew ] = 1;
2445
2446 if_const ( NOTIFICATIONS )
2447 m_iJustPushed = tNew.m_uDocID;
2448
2449 if ( !bGrouped )
2450 {
2451 tNew.SetAttr ( m_tLocGroupby, uGroupKey );
2452 tNew.SetAttr ( m_tLocCount, 1 );
2453 if_const ( DISTINCT )
2454 tNew.SetAttr ( m_tLocDistinct, 0 );
2455 } else
2456 {
2457 m_uLastGroupKey = uGroupKey;
2458 ARRAY_FOREACH ( i, m_dAggregates )
2459 m_dAggregates[i]->Ungroup ( &tNew );
2460 }
2461
2462 m_hGroup2Match.Add ( &tNew, uGroupKey );
2463 m_iTotal++;
2464 CHECKINTEGRITY();
2465 return true;
2466 }
2467
CalcAvg(bool bGroup)2468 void CalcAvg ( bool bGroup )
2469 {
2470 if ( !m_dAvgs.GetLength() )
2471 return;
2472
2473 int iHeadMatch;
2474 int iMatch = iHeadMatch = 0;
2475 for ( int i=0; i<m_iUsed; ++i )
2476 {
2477 CSphMatch * pMatch = m_pData + iMatch;
2478 ARRAY_FOREACH ( j, m_dAvgs )
2479 {
2480 if ( bGroup )
2481 m_dAvgs[j]->Finalize ( pMatch );
2482 else
2483 m_dAvgs[j]->Ungroup ( pMatch );
2484 }
2485 iMatch = m_dGroupByList [ iMatch ];
2486 if ( iMatch<0 )
2487 iMatch = ++iHeadMatch;
2488 }
2489 }
2490
2491 // rebuild m_hGroup2Match to point to the subgroups (2-nd elem and further)
2492 // returns true if any such subgroup found
Hash2nd()2493 void Hash2nd()
2494 {
2495 // let the hash points to the chains from 2-nd elem
2496 m_hGroup2Match.Reset();
2497 int iHeads = m_iUsed;
2498 for ( int i=0; i<iHeads; i++ )
2499 {
2500 if ( m_dGroupsLen[i]>1 )
2501 {
2502 int iCount = m_dGroupsLen[i];
2503 int iTail = m_dGroupByList[i];
2504
2505 assert ( iTail>=0 );
2506 assert ( m_pData[iTail].GetAttr ( m_tLocGroupby )==0 ||
2507 m_pData[i].GetAttr ( m_tLocGroupby )==m_pData[iTail].GetAttr ( m_tLocGroupby ) );
2508
2509 m_hGroup2Match.Add ( m_pData + iTail, m_pData[i].GetAttr ( m_tLocGroupby ) );
2510 iHeads -= iCount - 1;
2511 m_dGroupsLen[iTail] = iCount;
2512 }
2513 }
2514 }
2515
2516
2517 /// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch * pTo,int iTag)2518 int Flatten ( CSphMatch * pTo, int iTag )
2519 {
2520 CountDistinct ();
2521 CalcAvg ( true );
2522
2523 Hash2nd();
2524
2525 SortGroups ();
2526 CSphVector<IAggrFunc *> dAggrs;
2527 if ( m_dAggregates.GetLength()!=m_dAvgs.GetLength() )
2528 {
2529 dAggrs = m_dAggregates;
2530 ARRAY_FOREACH ( i, m_dAvgs )
2531 dAggrs.RemoveValue ( m_dAvgs[i] );
2532 }
2533
2534 const CSphMatch * pBegin = pTo;
2535 int iTotal = GetLength ();
2536 int iTopGroupMatch = 0;
2537 int iEntry = 0;
2538
2539 while ( iEntry<iTotal )
2540 {
2541 CSphMatch * pMatch = m_pData + iTopGroupMatch;
2542 ARRAY_FOREACH ( j, dAggrs )
2543 dAggrs[j]->Finalize ( pMatch );
2544
2545 bool bTopPassed = ( !m_pAggrFilter || m_pAggrFilter->Eval ( *pMatch ) );
2546
2547 // copy top group match
2548 if ( bTopPassed )
2549 {
2550 m_tSchema.CloneMatch ( pTo, *pMatch );
2551 if ( iTag>=0 )
2552 pTo->m_iTag = iTag;
2553 pTo++;
2554 }
2555 iEntry++;
2556 iTopGroupMatch++;
2557
2558 // now look for the next match.
2559 // In this specific case (2-nd, just after the head)
2560 // we have to look it in the hash, not in the linked list!
2561 CSphMatch ** ppMatch = m_hGroup2Match ( pMatch->GetAttr ( m_tLocGroupby ) );
2562 int iNext = ( ppMatch ? *ppMatch-m_pData : -1 );
2563 while ( iNext>=0 )
2564 {
2565 // copy rest group matches
2566 if ( bTopPassed )
2567 {
2568 m_tPregroup.Combine ( pTo, m_pData+iNext, pMatch );
2569 if ( iTag>=0 )
2570 pTo->m_iTag = iTag;
2571 pTo++;
2572 }
2573 iEntry++;
2574 iNext = m_dGroupByList[iNext];
2575 }
2576 }
2577
2578 m_iFreeHeads = m_iUsed = 0;
2579 m_iTotal = 0;
2580 ARRAY_FOREACH ( i, m_dGroupByList )
2581 {
2582 m_dGroupByList[i] = -1;
2583 m_dGroupsLen[i] = 0;
2584 }
2585
2586 m_hGroup2Match.Reset ();
2587 if_const ( DISTINCT )
2588 m_tUniq.Resize ( 0 );
2589
2590 return ( pTo-pBegin );
2591 }
2592
2593 /// get entries count
GetLength() const2594 int GetLength () const
2595 {
2596 return Min ( m_iUsed, m_iLimit );
2597 }
2598
2599 /// set group comparator state
SetGroupState(const CSphMatchComparatorState & tState)2600 void SetGroupState ( const CSphMatchComparatorState & tState )
2601 {
2602 m_tGroupSorter.m_fnStrCmp = tState.m_fnStrCmp;
2603
2604 // FIXME! manual bitwise copying.. yuck
2605 for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
2606 {
2607 m_tGroupSorter.m_eKeypart[i] = tState.m_eKeypart[i];
2608 m_tGroupSorter.m_tLocator[i] = tState.m_tLocator[i];
2609 }
2610 m_tGroupSorter.m_uAttrDesc = tState.m_uAttrDesc;
2611 m_tGroupSorter.m_iNow = tState.m_iNow;
2612
2613 // check whether we sort by distinct
2614 if_const ( DISTINCT && m_tDistinctLoc.m_iBitOffset>=0 )
2615 for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
2616 if ( m_tGroupSorter.m_tLocator[i].m_iBitOffset==m_tDistinctLoc.m_iBitOffset )
2617 {
2618 m_bSortByDistinct = true;
2619 break;
2620 }
2621 }
2622
2623 protected:
2624 /// count distinct values if necessary
CountDistinct()2625 void CountDistinct ()
2626 {
2627 if_const ( DISTINCT )
2628 {
2629 m_tUniq.Sort ();
2630 SphGroupKey_t uGroup;
2631 for ( int iCount = m_tUniq.CountStart ( &uGroup ); iCount; iCount = m_tUniq.CountNext ( &uGroup ) )
2632 {
2633 CSphMatch ** ppMatch = m_hGroup2Match(uGroup);
2634 if ( ppMatch )
2635 (*ppMatch)->SetAttr ( m_tLocDistinct, iCount );
2636 }
2637 }
2638 }
2639
2640 /// cut worst N groups off the buffer tail
CutWorst(int iBound)2641 void CutWorst ( int iBound )
2642 {
2643 #ifndef NDEBUG
2644 ++m_iruns;
2645 #endif
2646 CHECKINTEGRITY ();
2647
2648 // sort groups
2649 if ( m_bSortByDistinct )
2650 CountDistinct ();
2651
2652 Hash2nd();
2653
2654 CalcAvg ( true );
2655 SortGroups ();
2656 CalcAvg ( false );
2657
2658 int iHead = 0;
2659 for ( int iLimit=0; iLimit<iBound; iHead++ )
2660 {
2661 const CSphMatch * pMatch = m_pData + iHead;
2662 CSphMatch ** ppTailMatch = m_hGroup2Match ( pMatch->GetAttr ( m_tLocGroupby ) );
2663 int iCount = 1;
2664 int iTail = -1;
2665 if ( ppTailMatch )
2666 {
2667 assert ( (*ppTailMatch)->GetAttr ( m_tLocGroupby )==0 ||
2668 pMatch->GetAttr ( m_tLocGroupby )==(*ppTailMatch)->GetAttr ( m_tLocGroupby ) );
2669 iTail = (*ppTailMatch) - m_pData;
2670 iCount = m_dGroupsLen[iTail];
2671 assert ( iCount>0 );
2672 }
2673
2674 // whole chain fits limit
2675 if ( ( iLimit + iCount )<=iBound )
2676 {
2677 m_dGroupByList[iHead] = iTail;
2678 m_dGroupsLen[iHead] = iCount;
2679 iLimit += iCount;
2680 continue;
2681 }
2682
2683 // only head match fits limit but not tail match(es)
2684 if ( ( iLimit + 1 )==iBound )
2685 {
2686 m_dGroupByList[iHead] = -1;
2687 m_dGroupsLen[iHead] = 1;
2688
2689 FreeMatchChain ( iTail );
2690 iHead++;
2691 break;
2692 }
2693
2694 // part of tail fits limit - it our last chain
2695 // fix-up chain to fits limit
2696 assert ( iBound-iLimit<=iCount );
2697 iCount = iBound - iLimit;
2698 m_dGroupByList[iHead] = iTail;
2699 m_dGroupsLen[iHead] = iCount;
2700
2701 iCount--;
2702 int iPrev = iTail;
2703 while ( iCount>0 )
2704 {
2705 iPrev = iTail;
2706 iTail = m_dGroupByList[iTail];
2707 iCount--;
2708 assert ( !iCount || iTail>=0 );
2709 }
2710 m_dGroupByList[iPrev] = -1;
2711
2712 iHead++;
2713 FreeMatchChain ( iTail );
2714 break;
2715 }
2716
2717 if_const ( DISTINCT )
2718 {
2719 int iCount = m_iUsed - iHead;
2720 CSphFixedVector<SphGroupKey_t> dRemove ( iCount );
2721 for ( int i=0; i<iCount; i++ )
2722 dRemove[i] = m_pData [ i + iHead ].GetAttr ( m_tLocGroupby );
2723
2724 if ( !m_bSortByDistinct )
2725 m_tUniq.Sort ();
2726
2727 m_tUniq.Compact ( &dRemove[0], iCount );
2728 }
2729
2730 // cleanup chains after limit
2731 for ( int i=iHead; i<m_iFreeHeads; ++i )
2732 {
2733 CSphMatch * pMatch = m_pData + i;
2734 CSphMatch ** ppTailMatch = m_hGroup2Match ( pMatch->GetAttr ( m_tLocGroupby ) );
2735 if ( ppTailMatch )
2736 {
2737 assert ( ( *ppTailMatch )->GetAttr ( m_tLocGroupby )==0 || pMatch->GetAttr ( m_tLocGroupby )==( *ppTailMatch )->GetAttr ( m_tLocGroupby ) );
2738 int iTail = ( *ppTailMatch ) - m_pData;
2739 assert ( m_dGroupsLen[iTail]>0 );
2740 FreeMatchChain ( iTail );
2741 }
2742
2743 if_const ( NOTIFICATIONS )
2744 m_dJustPopped.Add ( pMatch->m_uDocID );
2745 m_tSchema.FreeStringPtrs ( pMatch );
2746 m_dGroupByList[i] = -1;
2747 m_dGroupsLen[i] = 0;
2748 }
2749
2750 // cleanup chain lengths after hash2nd
2751 for ( int i=m_iSize; i<m_dGroupsLen.GetLength(); i++ )
2752 m_dGroupsLen[i] = 0;
2753
2754 // rehash
2755 m_hGroup2Match.Reset ();
2756 for ( int i=0; i<iHead; i++ )
2757 m_hGroup2Match.Add ( m_pData + i, m_pData[i].GetAttr ( m_tLocGroupby ) );
2758
2759 // cut groups
2760 m_iUsed = iBound;
2761 m_iFreeHeads = iHead;
2762 CHECKINTEGRITY();
2763 }
2764
2765 /// sort groups buffer
SortGroups()2766 void SortGroups ()
2767 {
2768 sphSort ( m_pData, m_iFreeHeads, m_tGroupSorter, m_tGroupSorter );
2769 }
2770
Finalize(ISphMatchProcessor & tProcessor,bool)2771 virtual void Finalize ( ISphMatchProcessor & tProcessor, bool )
2772 {
2773 if ( !GetLength() )
2774 return;
2775
2776 if ( m_iUsed>m_iLimit )
2777 CutWorst ( m_iLimit );
2778
2779 for ( int iMatch=0; iMatch<m_iFreeHeads; iMatch++ )
2780 {
2781 // this is head match
2782 CSphMatch * pMatch = m_pData + iMatch;
2783 tProcessor.Process ( pMatch );
2784
2785 int iNext = m_dGroupByList[iMatch];
2786 int iCount = m_dGroupsLen[iMatch] - 1;
2787 while ( iCount>0 )
2788 {
2789 tProcessor.Process ( m_pData + iNext );
2790 iNext = m_dGroupByList[iNext];
2791 iCount--;
2792 }
2793 }
2794 }
2795 };
2796
2797
2798 /// match sorter with k-buffering and group-by for MVAs
2799 template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
2800 class CSphKBufferMVAGroupSorter : public CSphKBufferGroupSorter < COMPGROUP, DISTINCT, NOTIFICATIONS >
2801 {
2802 protected:
2803 const DWORD * m_pMva; ///< pointer to MVA pool for incoming matches
2804 bool m_bArenaProhibit;
2805 CSphAttrLocator m_tMvaLocator;
2806 bool m_bMva64;
2807
2808 public:
2809 /// ctor
CSphKBufferMVAGroupSorter(const ISphMatchComparator * pComp,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings)2810 CSphKBufferMVAGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
2811 : CSphKBufferGroupSorter < COMPGROUP, DISTINCT, NOTIFICATIONS > ( pComp, pQuery, tSettings )
2812 , m_pMva ( NULL )
2813 , m_bArenaProhibit ( false )
2814 , m_bMva64 ( tSettings.m_bMva64 )
2815 {
2816 this->m_pGrouper->GetLocator ( m_tMvaLocator );
2817 }
2818
2819 /// check if this sorter does groupby
IsGroupby() const2820 virtual bool IsGroupby () const
2821 {
2822 return true;
2823 }
2824
2825 /// set MVA pool for subsequent matches
SetMVAPool(const DWORD * pMva,bool bArenaProhibit)2826 void SetMVAPool ( const DWORD * pMva, bool bArenaProhibit )
2827 {
2828 m_pMva = pMva;
2829 m_bArenaProhibit = bArenaProhibit;
2830 }
2831
2832 /// add entry to the queue
Push(const CSphMatch & tEntry)2833 virtual bool Push ( const CSphMatch & tEntry )
2834 {
2835 assert ( m_pMva );
2836 if ( !m_pMva )
2837 return false;
2838
2839 // get that list
2840 // FIXME! OPTIMIZE! use simpler locator than full bits/count here
2841 // FIXME! hardcoded MVA type, so here's MVA_DOWNSIZE marker for searching
2842 const DWORD * pValues = tEntry.GetAttrMVA ( this->m_tMvaLocator, m_pMva, m_bArenaProhibit ); // (this pointer is for gcc; it doesn't work otherwise)
2843 if ( !pValues )
2844 return false;
2845
2846 DWORD iValues = *pValues++;
2847
2848 bool bRes = false;
2849 if ( m_bMva64 )
2850 {
2851 assert ( ( iValues%2 )==0 );
2852 for ( ;iValues>0; iValues-=2, pValues+=2 )
2853 {
2854 int64_t iMva = MVA_UPSIZE ( pValues );
2855 SphGroupKey_t uGroupkey = this->m_pGrouper->KeyFromValue ( iMva );
2856 bRes |= this->PushEx ( tEntry, uGroupkey, false, false );
2857 }
2858
2859 } else
2860 {
2861 while ( iValues-- )
2862 {
2863 SphGroupKey_t uGroupkey = this->m_pGrouper->KeyFromValue ( *pValues++ );
2864 bRes |= this->PushEx ( tEntry, uGroupkey, false, false );
2865 }
2866 }
2867 return bRes;
2868 }
2869
2870 /// add pre-grouped entry to the queue
PushGrouped(const CSphMatch & tEntry,bool bNewSet)2871 virtual bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet )
2872 {
2873 // re-group it based on the group key
2874 // (first 'this' is for icc; second 'this' is for gcc)
2875 return this->PushEx ( tEntry, tEntry.GetAttr ( this->m_tLocGroupby ), true, bNewSet );
2876 }
2877 };
2878
2879
2880 /// match sorter with k-buffering and group-by for JSON arrays
2881 template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
2882 class CSphKBufferJsonGroupSorter : public CSphKBufferGroupSorter < COMPGROUP, DISTINCT, NOTIFICATIONS >
2883 {
2884 public:
2885 /// ctor
CSphKBufferJsonGroupSorter(const ISphMatchComparator * pComp,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings)2886 CSphKBufferJsonGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
2887 : CSphKBufferGroupSorter < COMPGROUP, DISTINCT, NOTIFICATIONS > ( pComp, pQuery, tSettings )
2888 {}
2889
2890 /// check if this sorter does groupby
IsGroupby() const2891 virtual bool IsGroupby () const
2892 {
2893 return true;
2894 }
2895
2896 /// add entry to the queue
Push(const CSphMatch & tMatch)2897 virtual bool Push ( const CSphMatch & tMatch )
2898 {
2899 bool bRes = false;
2900
2901 int iLen;
2902 char sBuf[32];
2903
2904 SphGroupKey_t uGroupkey = this->m_pGrouper->KeyFromMatch ( tMatch );
2905
2906 int64_t iValue = (int64_t)uGroupkey;
2907 const BYTE * pStrings = ((CSphGrouperJsonField*)this->m_pGrouper)->m_pStrings;
2908 const BYTE * pValue = pStrings + ( iValue & 0xffffffff );
2909 ESphJsonType eRes = (ESphJsonType)( iValue >> 32 );
2910
2911 switch ( eRes )
2912 {
2913 case JSON_ROOT:
2914 {
2915 iLen = sphJsonNodeSize ( JSON_ROOT, pValue );
2916 bool bEmpty = iLen==5; // mask and JSON_EOF
2917 uGroupkey = bEmpty ? 0 : sphFNV64 ( pValue, iLen );
2918 return this->PushEx ( tMatch, uGroupkey, false, false, bEmpty ? NULL : &iValue );
2919 }
2920 case JSON_STRING:
2921 case JSON_OBJECT:
2922 case JSON_MIXED_VECTOR:
2923 iLen = sphJsonUnpackInt ( &pValue );
2924 uGroupkey = ( iLen==1 && eRes!=JSON_STRING ) ? 0 : sphFNV64 ( pValue, iLen );
2925 return this->PushEx ( tMatch, uGroupkey, false, false, ( iLen==1 && eRes!=JSON_STRING ) ? 0: &iValue );
2926 case JSON_STRING_VECTOR:
2927 {
2928 sphJsonUnpackInt ( &pValue );
2929 iLen = sphJsonUnpackInt ( &pValue );
2930 for ( int i=0;i<iLen;i++ )
2931 {
2932 DWORD uOff = pValue-pStrings;
2933 int64_t iValue = ( ( (int64_t)uOff ) | ( ( (int64_t)JSON_STRING )<<32 ) );
2934 int iStrLen = sphJsonUnpackInt ( &pValue );
2935 uGroupkey = sphFNV64 ( pValue, iStrLen );
2936 bRes |= this->PushEx ( tMatch, uGroupkey, false, false, &iValue );
2937 pValue += iStrLen;
2938 }
2939 return bRes;
2940 }
2941 case JSON_INT32:
2942 uGroupkey = sphFNV64 ( (BYTE*)FormatInt ( sBuf, (int)sphGetDword(pValue) ) );
2943 break;
2944 case JSON_INT64:
2945 uGroupkey = sphFNV64 ( (BYTE*)FormatInt ( sBuf, (int)sphJsonLoadBigint ( &pValue ) ) );
2946 break;
2947 case JSON_DOUBLE:
2948 snprintf ( sBuf, sizeof(sBuf), "%f", sphQW2D ( sphJsonLoadBigint ( &pValue ) ) );
2949 uGroupkey = sphFNV64 ( (const BYTE*)sBuf );
2950 break;
2951 case JSON_INT32_VECTOR:
2952 {
2953 iLen = sphJsonUnpackInt ( &pValue );
2954 int * p = (int*)pValue;
2955 DWORD uOff = pValue-pStrings;
2956 for ( int i=0;i<iLen;i++ )
2957 {
2958 int64_t iPacked = ( ( (int64_t)uOff ) | ( ( (int64_t)JSON_INT32 )<<32 ) );
2959 uGroupkey = *p++;
2960 bRes |= this->PushEx ( tMatch, uGroupkey, false, false, &iPacked );
2961 uOff+=4;
2962 }
2963 return bRes;
2964 }
2965 break;
2966 case JSON_INT64_VECTOR:
2967 case JSON_DOUBLE_VECTOR:
2968 {
2969 iLen = sphJsonUnpackInt ( &pValue );
2970 int64_t * p = (int64_t*)pValue;
2971 DWORD uOff = pValue-pStrings;
2972 ESphJsonType eType = eRes==JSON_INT64_VECTOR ? JSON_INT64 : JSON_DOUBLE;
2973 for ( int i=0;i<iLen;i++ )
2974 {
2975 int64_t iPacked = ( ( (int64_t)uOff ) | ( ( (int64_t)eType )<<32 ) );
2976 uGroupkey = *p++;
2977 bRes |= this->PushEx ( tMatch, uGroupkey, false, false, &iPacked );
2978 uOff+=8;
2979 }
2980 return bRes;
2981 }
2982 break;
2983 default:
2984 uGroupkey = 0;
2985 iValue = 0;
2986 break;
2987 }
2988
2989 bRes |= this->PushEx ( tMatch, uGroupkey, false, false, &iValue );
2990 return bRes;
2991 }
2992
2993 /// add pre-grouped entry to the queue
PushGrouped(const CSphMatch & tEntry,bool bNewSet)2994 virtual bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet )
2995 {
2996 // re-group it based on the group key
2997 // (first 'this' is for icc; second 'this' is for gcc)
2998 return this->PushEx ( tEntry, tEntry.GetAttr ( this->m_tLocGroupby ), true, bNewSet );
2999 }
3000 };
3001
3002
3003 /// implicit group-by sorter
3004 template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
3005 class CSphImplicitGroupSorter : public ISphMatchSorter, ISphNoncopyable, protected CSphGroupSorterSettings
3006 {
3007 protected:
3008 CSphMatch m_tData;
3009 bool m_bDataInitialized;
3010
3011 CSphVector<SphUngroupedValue_t> m_dUniq;
3012
3013 CSphVector<IAggrFunc *> m_dAggregates;
3014 const ISphFilter * m_pAggrFilter; ///< aggregate filter for matches on flatten
3015 MatchCloner_t m_tPregroup;
3016
3017 public:
3018 /// ctor
CSphImplicitGroupSorter(const ISphMatchComparator * DEBUGARG (pComp),const CSphQuery *,const CSphGroupSorterSettings & tSettings)3019 CSphImplicitGroupSorter ( const ISphMatchComparator * DEBUGARG(pComp), const CSphQuery *, const CSphGroupSorterSettings & tSettings )
3020 : CSphGroupSorterSettings ( tSettings )
3021 , m_bDataInitialized ( false )
3022 , m_pAggrFilter ( tSettings.m_pAggrFilterTrait )
3023 {
3024 assert ( DISTINCT==false || tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
3025 assert ( !pComp );
3026
3027 if_const ( NOTIFICATIONS )
3028 m_dJustPopped.Reserve(1);
3029
3030 m_dUniq.Reserve ( 16384 );
3031 m_iMatchCapacity = 1;
3032 }
3033
3034 /// dtor
~CSphImplicitGroupSorter()3035 ~CSphImplicitGroupSorter ()
3036 {
3037 SafeDelete ( m_pAggrFilter );
3038 ARRAY_FOREACH ( i, m_dAggregates )
3039 SafeDelete ( m_dAggregates[i] );
3040 }
3041
3042 /// schema setup
SetSchema(CSphRsetSchema & tSchema)3043 virtual void SetSchema ( CSphRsetSchema & tSchema )
3044 {
3045 m_tSchema = tSchema;
3046 m_tPregroup.SetSchema ( &m_tSchema );
3047 m_tPregroup.m_dAttrsRaw.Add ( m_tLocGroupby );
3048 m_tPregroup.m_dAttrsRaw.Add ( m_tLocCount );
3049 if_const ( DISTINCT )
3050 {
3051 m_tPregroup.m_dAttrsRaw.Add ( m_tLocDistinct );
3052 }
3053
3054 CSphVector<IAggrFunc *> dTmp;
3055 ESphSortKeyPart dTmpKeypart[CSphMatchComparatorState::MAX_ATTRS];
3056 CSphAttrLocator dTmpLocator[CSphMatchComparatorState::MAX_ATTRS];
3057 ExtractAggregates ( m_tSchema, m_tLocCount, dTmpKeypart, dTmpLocator, m_dAggregates, dTmp, m_tPregroup );
3058 assert ( !dTmp.GetLength() );
3059 }
3060
GetDataLength() const3061 int GetDataLength () const
3062 {
3063 return 1;
3064 }
3065
UsesAttrs() const3066 bool UsesAttrs () const
3067 {
3068 return true;
3069 }
3070
3071 /// check if this sorter does groupby
IsGroupby() const3072 virtual bool IsGroupby () const
3073 {
3074 return true;
3075 }
3076
CanMulti() const3077 virtual bool CanMulti () const
3078 {
3079 return true;
3080 }
3081
3082 /// set string pool pointer (for string+groupby sorters)
SetStringPool(const BYTE *)3083 void SetStringPool ( const BYTE * )
3084 {
3085 }
3086
3087 /// add entry to the queue
Push(const CSphMatch & tEntry)3088 virtual bool Push ( const CSphMatch & tEntry )
3089 {
3090 return PushEx ( tEntry, false );
3091 }
3092
3093 /// add grouped entry to the queue. bNewSet indicates the beginning of resultset returned by an agent.
PushGrouped(const CSphMatch & tEntry,bool)3094 virtual bool PushGrouped ( const CSphMatch & tEntry, bool )
3095 {
3096 return PushEx ( tEntry, true );
3097 }
3098
3099 /// store all entries into specified location in sorted order, and remove them from queue
Flatten(CSphMatch * pTo,int iTag)3100 virtual int Flatten ( CSphMatch * pTo, int iTag )
3101 {
3102 assert ( m_bDataInitialized );
3103
3104 CountDistinct ();
3105
3106 ARRAY_FOREACH ( j, m_dAggregates )
3107 m_dAggregates[j]->Finalize ( &m_tData );
3108
3109 int iCopied = 0;
3110 if ( !m_pAggrFilter || m_pAggrFilter->Eval ( m_tData ) )
3111 {
3112 iCopied = 1;
3113 m_tSchema.CloneMatch ( pTo, m_tData );
3114 m_tSchema.FreeStringPtrs ( &m_tData );
3115 if ( iTag>=0 )
3116 pTo->m_iTag = iTag;
3117 }
3118
3119 m_iTotal = 0;
3120 m_bDataInitialized = false;
3121
3122 if_const ( DISTINCT )
3123 m_dUniq.Resize(0);
3124
3125 return iCopied;
3126 }
3127
3128 /// finalize, perform final sort/cut as needed
Finalize(ISphMatchProcessor & tProcessor,bool)3129 void Finalize ( ISphMatchProcessor & tProcessor, bool )
3130 {
3131 if ( !GetLength() )
3132 return;
3133
3134 tProcessor.Process ( &m_tData );
3135 }
3136
3137 /// get entries count
GetLength() const3138 int GetLength () const
3139 {
3140 return m_bDataInitialized ? 1 : 0;
3141 }
3142
3143 protected:
3144 /// add entry to the queue
PushEx(const CSphMatch & tEntry,bool bGrouped)3145 bool PushEx ( const CSphMatch & tEntry, bool bGrouped )
3146 {
3147 if_const ( NOTIFICATIONS )
3148 {
3149 m_iJustPushed = 0;
3150 m_dJustPopped.Resize(0);
3151 }
3152
3153 if ( m_bDataInitialized )
3154 {
3155 assert ( m_tData.m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
3156
3157 if ( bGrouped )
3158 {
3159 // it's already grouped match
3160 // sum grouped matches count
3161 m_tData.SetAttr ( m_tLocCount, m_tData.GetAttr ( m_tLocCount ) + tEntry.GetAttr ( m_tLocCount ) ); // OPTIMIZE! AddAttr()?
3162 } else
3163 {
3164 // it's a simple match
3165 // increase grouped matches count
3166 m_tData.SetAttr ( m_tLocCount, 1 + m_tData.GetAttr ( m_tLocCount ) ); // OPTIMIZE! IncAttr()?
3167 }
3168
3169 // update aggregates
3170 ARRAY_FOREACH ( i, m_dAggregates )
3171 m_dAggregates[i]->Update ( &m_tData, &tEntry, bGrouped );
3172
3173 // if new entry is more relevant, update from it
3174 if ( tEntry.m_uDocID<m_tData.m_uDocID )
3175 {
3176 if_const ( NOTIFICATIONS )
3177 {
3178 m_iJustPushed = tEntry.m_uDocID;
3179 m_dJustPopped.Add ( m_tData.m_uDocID );
3180 }
3181
3182 m_tPregroup.Clone ( &m_tData, &tEntry );
3183 }
3184 }
3185
3186 // submit actual distinct value in all cases
3187 if_const ( DISTINCT )
3188 {
3189 int iCount = 1;
3190 if ( bGrouped )
3191 iCount = (int)tEntry.GetAttr ( m_tLocDistinct );
3192 m_dUniq.Add ( SphUngroupedValue_t ( tEntry.GetAttr ( m_tDistinctLoc ), iCount ) ); // OPTIMIZE! use simpler locator here?
3193 }
3194
3195 // it's a dupe anyway, so we shouldn't update total matches count
3196 if ( m_bDataInitialized )
3197 return false;
3198
3199 // add first
3200 m_tSchema.CloneMatch ( &m_tData, tEntry );
3201
3202 if_const ( NOTIFICATIONS )
3203 m_iJustPushed = m_tData.m_uDocID;
3204
3205 if ( !bGrouped )
3206 {
3207 m_tData.SetAttr ( m_tLocGroupby, 1 ); // fake group number
3208 m_tData.SetAttr ( m_tLocCount, 1 );
3209 if_const ( DISTINCT )
3210 m_tData.SetAttr ( m_tLocDistinct, 0 );
3211 } else
3212 {
3213 ARRAY_FOREACH ( i, m_dAggregates )
3214 m_dAggregates[i]->Ungroup ( &m_tData );
3215 }
3216
3217 m_bDataInitialized = true;
3218 m_iTotal++;
3219 return true;
3220 }
3221
3222 /// count distinct values if necessary
CountDistinct()3223 void CountDistinct ()
3224 {
3225 if_const ( DISTINCT )
3226 {
3227 assert ( m_bDataInitialized );
3228
3229 m_dUniq.Sort ();
3230 int iCount = 0;
3231 ARRAY_FOREACH ( i, m_dUniq )
3232 {
3233 if ( i>0 && m_dUniq[i-1]==m_dUniq[i] )
3234 continue;
3235 iCount += m_dUniq[i].m_iCount;
3236 }
3237
3238 m_tData.SetAttr ( m_tLocDistinct, iCount );
3239 }
3240 }
3241 };
3242
3243 //////////////////////////////////////////////////////////////////////////
3244 // PLAIN SORTING FUNCTORS
3245 //////////////////////////////////////////////////////////////////////////
3246
3247 /// match sorter
3248 struct MatchRelevanceLt_fn : public ISphMatchComparator
3249 {
VirtualIsLessMatchRelevanceLt_fn3250 virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3251 {
3252 return IsLess ( a, b, t );
3253 }
3254
IsLessMatchRelevanceLt_fn3255 static bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & )
3256 {
3257 if ( a.m_iWeight!=b.m_iWeight )
3258 return a.m_iWeight < b.m_iWeight;
3259
3260 return a.m_uDocID > b.m_uDocID;
3261 }
3262 };
3263
3264
3265 /// match sorter
3266 struct MatchAttrLt_fn : public ISphMatchComparator
3267 {
VirtualIsLessMatchAttrLt_fn3268 virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3269 {
3270 return IsLess ( a, b, t );
3271 }
3272
IsLessMatchAttrLt_fn3273 static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3274 {
3275 if ( t.m_eKeypart[0]!=SPH_KEYPART_STRING )
3276 {
3277 SphAttr_t aa = a.GetAttr ( t.m_tLocator[0] );
3278 SphAttr_t bb = b.GetAttr ( t.m_tLocator[0] );
3279 if ( aa!=bb )
3280 return aa<bb;
3281 } else
3282 {
3283 int iCmp = t.CmpStrings ( a, b, 0 );
3284 if ( iCmp!=0 )
3285 return iCmp<0;
3286 }
3287
3288 if ( a.m_iWeight!=b.m_iWeight )
3289 return a.m_iWeight < b.m_iWeight;
3290
3291 return a.m_uDocID > b.m_uDocID;
3292 }
3293 };
3294
3295
3296 /// match sorter
3297 struct MatchAttrGt_fn : public ISphMatchComparator
3298 {
VirtualIsLessMatchAttrGt_fn3299 virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3300 {
3301 return IsLess ( a, b, t );
3302 }
3303
IsLessMatchAttrGt_fn3304 static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3305 {
3306 if ( t.m_eKeypart[0]!=SPH_KEYPART_STRING )
3307 {
3308 SphAttr_t aa = a.GetAttr ( t.m_tLocator[0] );
3309 SphAttr_t bb = b.GetAttr ( t.m_tLocator[0] );
3310 if ( aa!=bb )
3311 return aa>bb;
3312 } else
3313 {
3314 int iCmp = t.CmpStrings ( a, b, 0 );
3315 if ( iCmp!=0 )
3316 return iCmp>0;
3317 }
3318
3319 if ( a.m_iWeight!=b.m_iWeight )
3320 return a.m_iWeight < b.m_iWeight;
3321
3322 return a.m_uDocID > b.m_uDocID;
3323 }
3324 };
3325
3326
3327 /// match sorter
3328 struct MatchTimeSegments_fn : public ISphMatchComparator
3329 {
VirtualIsLessMatchTimeSegments_fn3330 virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3331 {
3332 return IsLess ( a, b, t );
3333 }
3334
IsLessMatchTimeSegments_fn3335 static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3336 {
3337 SphAttr_t aa = a.GetAttr ( t.m_tLocator[0] );
3338 SphAttr_t bb = b.GetAttr ( t.m_tLocator[0] );
3339 int iA = GetSegment ( aa, t.m_iNow );
3340 int iB = GetSegment ( bb, t.m_iNow );
3341 if ( iA!=iB )
3342 return iA > iB;
3343
3344 if ( a.m_iWeight!=b.m_iWeight )
3345 return a.m_iWeight < b.m_iWeight;
3346
3347 if ( aa!=bb )
3348 return aa<bb;
3349
3350 return a.m_uDocID > b.m_uDocID;
3351 }
3352
3353 protected:
GetSegmentMatchTimeSegments_fn3354 static inline int GetSegment ( SphAttr_t iStamp, SphAttr_t iNow )
3355 {
3356 if ( iStamp>=iNow-3600 ) return 0; // last hour
3357 if ( iStamp>=iNow-24*3600 ) return 1; // last day
3358 if ( iStamp>=iNow-7*24*3600 ) return 2; // last week
3359 if ( iStamp>=iNow-30*24*3600 ) return 3; // last month
3360 if ( iStamp>=iNow-90*24*3600 ) return 4; // last 3 months
3361 return 5; // everything else
3362 }
3363 };
3364
3365
3366 /// match sorter
3367 struct MatchExpr_fn : public ISphMatchComparator
3368 {
VirtualIsLessMatchExpr_fn3369 virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3370 {
3371 return IsLess ( a, b, t );
3372 }
3373
IsLessMatchExpr_fn3374 static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3375 {
3376 float aa = a.GetAttrFloat ( t.m_tLocator[0] ); // FIXME! OPTIMIZE!!! simplified (dword-granular) getter could be used here
3377 float bb = b.GetAttrFloat ( t.m_tLocator[0] );
3378 if ( aa!=bb )
3379 return aa<bb;
3380 return a.m_uDocID>b.m_uDocID;
3381 }
3382 };
3383
3384 /////////////////////////////////////////////////////////////////////////////
3385
3386 #define SPH_TEST_PAIR(_aa,_bb,_idx ) \
3387 if ( (_aa)!=(_bb) ) \
3388 return ( (t.m_uAttrDesc >> (_idx)) & 1 ) ^ ( (_aa) > (_bb) );
3389
3390
3391 #define SPH_TEST_KEYPART(_idx) \
3392 switch ( t.m_eKeypart[_idx] ) \
3393 { \
3394 case SPH_KEYPART_ID: SPH_TEST_PAIR ( a.m_uDocID, b.m_uDocID, _idx ); break; \
3395 case SPH_KEYPART_WEIGHT: SPH_TEST_PAIR ( a.m_iWeight, b.m_iWeight, _idx ); break; \
3396 case SPH_KEYPART_INT: \
3397 { \
3398 SphAttr_t aa = a.GetAttr ( t.m_tLocator[_idx] ); \
3399 SphAttr_t bb = b.GetAttr ( t.m_tLocator[_idx] ); \
3400 SPH_TEST_PAIR ( aa, bb, _idx ); \
3401 break; \
3402 } \
3403 case SPH_KEYPART_FLOAT: \
3404 { \
3405 float aa = a.GetAttrFloat ( t.m_tLocator[_idx] ); \
3406 float bb = b.GetAttrFloat ( t.m_tLocator[_idx] ); \
3407 SPH_TEST_PAIR ( aa, bb, _idx ) \
3408 break; \
3409 } \
3410 case SPH_KEYPART_STRINGPTR: \
3411 case SPH_KEYPART_STRING: \
3412 { \
3413 int iCmp = t.CmpStrings ( a, b, _idx ); \
3414 if ( iCmp!=0 ) \
3415 return ( ( t.m_uAttrDesc >> (_idx) ) & 1 ) ^ ( iCmp>0 ); \
3416 break; \
3417 } \
3418 }
3419
3420
3421 struct MatchGeneric2_fn : public ISphMatchComparator
3422 {
VirtualIsLessMatchGeneric2_fn3423 virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3424 {
3425 return IsLess ( a, b, t );
3426 }
3427
IsLessMatchGeneric2_fn3428 static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3429 {
3430 SPH_TEST_KEYPART(0);
3431 SPH_TEST_KEYPART(1);
3432 return a.m_uDocID>b.m_uDocID;
3433 }
3434 };
3435
3436
3437 struct MatchGeneric3_fn : public ISphMatchComparator
3438 {
VirtualIsLessMatchGeneric3_fn3439 virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3440 {
3441 return IsLess ( a, b, t );
3442 }
3443
IsLessMatchGeneric3_fn3444 static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3445 {
3446 SPH_TEST_KEYPART(0);
3447 SPH_TEST_KEYPART(1);
3448 SPH_TEST_KEYPART(2);
3449 return a.m_uDocID>b.m_uDocID;
3450 }
3451 };
3452
3453
3454 struct MatchGeneric4_fn : public ISphMatchComparator
3455 {
VirtualIsLessMatchGeneric4_fn3456 virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3457 {
3458 return IsLess ( a, b, t );
3459 }
3460
IsLessMatchGeneric4_fn3461 static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3462 {
3463 SPH_TEST_KEYPART(0);
3464 SPH_TEST_KEYPART(1);
3465 SPH_TEST_KEYPART(2);
3466 SPH_TEST_KEYPART(3);
3467 return a.m_uDocID>b.m_uDocID;
3468 }
3469 };
3470
3471
3472 struct MatchGeneric5_fn : public ISphMatchComparator
3473 {
VirtualIsLessMatchGeneric5_fn3474 virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
3475 {
3476 return IsLess ( a, b, t );
3477 }
3478
IsLessMatchGeneric5_fn3479 static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
3480 {
3481 SPH_TEST_KEYPART(0);
3482 SPH_TEST_KEYPART(1);
3483 SPH_TEST_KEYPART(2);
3484 SPH_TEST_KEYPART(3);
3485 SPH_TEST_KEYPART(4);
3486 return a.m_uDocID>b.m_uDocID;
3487 }
3488 };
3489
3490 //////////////////////////////////////////////////////////////////////////
3491 // SORT CLAUSE PARSER
3492 //////////////////////////////////////////////////////////////////////////
3493
3494 static const int MAX_SORT_FIELDS = 5; // MUST be in sync with CSphMatchComparatorState::m_iAttr
3495
3496
3497 class SortClauseTokenizer_t
3498 {
3499 protected:
3500 const char * m_pCur;
3501 const char * m_pMax;
3502 char * m_pBuf;
3503
3504 protected:
ToLower(char c)3505 char ToLower ( char c )
3506 {
3507 // 0..9, A..Z->a..z, _, a..z, @, .
3508 if ( ( c>='0' && c<='9' ) || ( c>='a' && c<='z' ) || c=='_' || c=='@' || c=='.' || c=='[' || c==']' || c=='\'' || c=='\"' || c=='(' || c==')' || c=='*' )
3509 return c;
3510 if ( c>='A' && c<='Z' )
3511 return c-'A'+'a';
3512 return 0;
3513 }
3514
3515 public:
SortClauseTokenizer_t(const char * sBuffer)3516 explicit SortClauseTokenizer_t ( const char * sBuffer )
3517 {
3518 int iLen = strlen(sBuffer);
3519 m_pBuf = new char [ iLen+1 ];
3520 m_pMax = m_pBuf+iLen;
3521 m_pCur = m_pBuf;
3522
3523 // make string lowercase but keep case of JSON.field
3524 bool bJson = false;
3525 for ( int i=0; i<=iLen; i++ )
3526 {
3527 char cSrc = sBuffer[i];
3528 char cDst = ToLower ( cSrc );
3529 bJson = ( cSrc=='.' || cSrc=='[' || ( bJson && cDst>0 ) ); // keep case of valid char sequence after '.' and '[' symbols
3530 m_pBuf[i] = bJson ? cSrc : cDst;
3531 }
3532 }
3533
~SortClauseTokenizer_t()3534 ~SortClauseTokenizer_t ()
3535 {
3536 SafeDeleteArray ( m_pBuf );
3537 }
3538
GetToken()3539 const char * GetToken ()
3540 {
3541 // skip spaces
3542 while ( m_pCur<m_pMax && !*m_pCur )
3543 m_pCur++;
3544 if ( m_pCur>=m_pMax )
3545 return NULL;
3546
3547 // memorize token start, and move pointer forward
3548 const char * sRes = m_pCur;
3549 while ( *m_pCur )
3550 m_pCur++;
3551 return sRes;
3552 }
3553
IsSparseCount(const char * sTok)3554 bool IsSparseCount ( const char * sTok )
3555 {
3556 const char * sSeq = "(*)";
3557 for ( ; sTok<m_pMax && *sSeq; sTok++ )
3558 {
3559 bool bGotSeq = ( *sSeq==*sTok );
3560 if ( bGotSeq )
3561 sSeq++;
3562
3563 // stop checking on any non space char outside sequence or sequence end
3564 if ( ( !bGotSeq && !sphIsSpace ( *sTok ) && *sTok!='\0' ) || !*sSeq )
3565 break;
3566 }
3567
3568 if ( !*sSeq && sTok+1<m_pMax && !sTok[1] )
3569 {
3570 // advance token iterator after composite count(*) token
3571 m_pCur = sTok+1;
3572 return true;
3573 } else
3574 {
3575 return false;
3576 }
3577 }
3578 };
3579
3580
Attr2Keypart(ESphAttr eType)3581 static inline ESphSortKeyPart Attr2Keypart ( ESphAttr eType )
3582 {
3583 switch ( eType )
3584 {
3585 case SPH_ATTR_FLOAT: return SPH_KEYPART_FLOAT;
3586 case SPH_ATTR_STRING: return SPH_KEYPART_STRING;
3587 case SPH_ATTR_JSON:
3588 case SPH_ATTR_JSON_FIELD:
3589 case SPH_ATTR_STRINGPTR: return SPH_KEYPART_STRINGPTR;
3590 default: return SPH_KEYPART_INT;
3591 }
3592 }
3593
3594
3595 static const char g_sIntAttrPrefix[] = "@int_str2ptr_";
3596
3597
sphParseSortClause(const CSphQuery * pQuery,const char * sClause,const ISphSchema & tSchema,ESphSortFunc & eFunc,CSphMatchComparatorState & tState,CSphString & sError)3598 ESortClauseParseResult sphParseSortClause ( const CSphQuery * pQuery, const char * sClause, const ISphSchema & tSchema,
3599 ESphSortFunc & eFunc, CSphMatchComparatorState & tState, CSphString & sError )
3600 {
3601 for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
3602 tState.m_dAttrs[i] = -1;
3603
3604 // mini parser
3605 SortClauseTokenizer_t tTok ( sClause );
3606
3607 bool bField = false; // whether i'm expecting field name or sort order
3608 int iField = 0;
3609
3610 for ( const char * pTok=tTok.GetToken(); pTok; pTok=tTok.GetToken() )
3611 {
3612 bField = !bField;
3613
3614 // special case, sort by random
3615 if ( iField==0 && bField && strcmp ( pTok, "@random" )==0 )
3616 return SORT_CLAUSE_RANDOM;
3617
3618 // handle sort order
3619 if ( !bField )
3620 {
3621 // check
3622 if ( strcmp ( pTok, "desc" ) && strcmp ( pTok, "asc" ) )
3623 {
3624 sError.SetSprintf ( "invalid sorting order '%s'", pTok );
3625 return SORT_CLAUSE_ERROR;
3626 }
3627
3628 // set
3629 if ( !strcmp ( pTok, "desc" ) )
3630 tState.m_uAttrDesc |= ( 1<<iField );
3631
3632 iField++;
3633 continue;
3634 }
3635
3636 // handle attribute name
3637 if ( iField==MAX_SORT_FIELDS )
3638 {
3639 sError.SetSprintf ( "too many sort-by attributes; maximum count is %d", MAX_SORT_FIELDS );
3640 return SORT_CLAUSE_ERROR;
3641 }
3642
3643 if ( !strcasecmp ( pTok, "@relevance" )
3644 || !strcasecmp ( pTok, "@rank" )
3645 || !strcasecmp ( pTok, "@weight" )
3646 || !strcasecmp ( pTok, "weight()" ) )
3647 {
3648 tState.m_eKeypart[iField] = SPH_KEYPART_WEIGHT;
3649
3650 } else if ( !strcasecmp ( pTok, "@id" ) || !strcasecmp ( pTok, "id" ) )
3651 {
3652 tState.m_eKeypart[iField] = SPH_KEYPART_ID;
3653
3654 } else
3655 {
3656 ESphAttr eAttrType = SPH_ATTR_NONE;
3657
3658 if ( !strcasecmp ( pTok, "@group" ) )
3659 pTok = "@groupby";
3660 else if ( !strcasecmp ( pTok, "count(*)" ) )
3661 pTok = "@count";
3662 else if ( !strcasecmp ( pTok, "facet()" ) )
3663 pTok = "@groupby"; // facet() is essentially a @groupby alias
3664 else if ( strcasecmp ( pTok, "count" )>=0 && tTok.IsSparseCount ( pTok + sizeof ( "count" ) - 1 ) ) // epression count(*) with various spaces
3665 pTok = "@count";
3666
3667
3668 // try to lookup plain attr in sorter schema
3669 int iAttr = tSchema.GetAttrIndex ( pTok );
3670
3671 // do not order by mva (the result is undefined)
3672 if ( iAttr>=0 && ( tSchema.GetAttr ( iAttr ).m_eAttrType==SPH_ATTR_UINT32SET
3673 || tSchema.GetAttr ( iAttr ).m_eAttrType==SPH_ATTR_INT64SET ) )
3674 {
3675 sError.SetSprintf ( "order by MVA is undefined" );
3676 return SORT_CLAUSE_ERROR;
3677 }
3678
3679 // try to lookup aliased count(*) and aliased groupby() in select items
3680 if ( iAttr<0 )
3681 {
3682 ARRAY_FOREACH ( i, pQuery->m_dItems )
3683 {
3684 const CSphQueryItem & tItem = pQuery->m_dItems[i];
3685 if ( !tItem.m_sAlias.cstr() || strcasecmp ( tItem.m_sAlias.cstr(), pTok ) )
3686 continue;
3687 if ( tItem.m_sExpr.Begins("@") )
3688 iAttr = tSchema.GetAttrIndex ( tItem.m_sExpr.cstr() );
3689 else if ( tItem.m_sExpr=="count(*)" )
3690 iAttr = tSchema.GetAttrIndex ( "@count" );
3691 else if ( tItem.m_sExpr=="groupby()" )
3692 {
3693 iAttr = tSchema.GetAttrIndex ( "@groupbystr" );
3694 // try numeric group by
3695 if ( iAttr<0 )
3696 iAttr = tSchema.GetAttrIndex ( "@groupby" );
3697 }
3698 break; // break in any case; because we did match the alias
3699 }
3700 }
3701
3702 // try JSON attribute and use JSON attribute instead of JSON field
3703 if ( iAttr<0 || ( iAttr>=0 && ( tSchema.GetAttr ( iAttr ).m_eAttrType==SPH_ATTR_JSON_FIELD
3704 || tSchema.GetAttr ( iAttr ).m_eAttrType==SPH_ATTR_JSON ) ) )
3705 {
3706 if ( iAttr>=0 )
3707 {
3708 // aliased SPH_ATTR_JSON_FIELD, reuse existing expression
3709 const CSphColumnInfo * pAttr = &tSchema.GetAttr(iAttr);
3710 if ( pAttr->m_pExpr.Ptr() )
3711 pAttr->m_pExpr->AddRef(); // SetupSortRemap uses refcounted pointer, but does not AddRef() itself, so help it
3712 tState.m_tSubExpr[iField] = pAttr->m_pExpr.Ptr();
3713 tState.m_tSubKeys[iField] = JsonKey_t ( pTok, strlen ( pTok ) );
3714
3715 } else
3716 {
3717 CSphString sJsonCol, sJsonKey;
3718 if ( sphJsonNameSplit ( pTok, &sJsonCol, &sJsonKey ) )
3719 {
3720 iAttr = tSchema.GetAttrIndex ( sJsonCol.cstr() );
3721 if ( iAttr>=0 )
3722 {
3723 tState.m_tSubExpr[iField] = sphExprParse ( pTok, tSchema, NULL, NULL, sError, NULL );
3724 tState.m_tSubKeys[iField] = JsonKey_t ( pTok, strlen ( pTok ) );
3725 }
3726 }
3727 }
3728 }
3729
3730 // try json conversion functions (integer()/double()/bigint() in the order by clause)
3731 if ( iAttr<0 )
3732 {
3733 ISphExpr * pExpr = sphExprParse ( pTok, tSchema, &eAttrType, NULL, sError, NULL );
3734 if ( pExpr )
3735 {
3736 tState.m_tSubExpr[iField] = pExpr;
3737 tState.m_tSubKeys[iField] = JsonKey_t ( pTok, strlen(pTok) );
3738 tState.m_tSubKeys[iField].m_uMask = 0;
3739 tState.m_tSubType[iField] = eAttrType;
3740 iAttr = 0; // will be remapped in SetupSortRemap
3741 }
3742 }
3743
3744 // try precalculated json fields received from agents (prefixed with @int_*)
3745 if ( iAttr<0 )
3746 {
3747 CSphString sName;
3748 sName.SetSprintf ( "%s%s", g_sIntAttrPrefix, pTok );
3749 iAttr = tSchema.GetAttrIndex ( sName.cstr() );
3750 }
3751
3752 // epic fail
3753 if ( iAttr<0 )
3754 {
3755 sError.SetSprintf ( "sort-by attribute '%s' not found", pTok );
3756 return SORT_CLAUSE_ERROR;
3757 }
3758
3759 const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
3760 tState.m_eKeypart[iField] = Attr2Keypart ( eAttrType!=SPH_ATTR_NONE ? eAttrType : tCol.m_eAttrType );
3761 tState.m_tLocator[iField] = tCol.m_tLocator;
3762 tState.m_dAttrs[iField] = iAttr;
3763 }
3764 }
3765
3766 if ( iField==0 )
3767 {
3768 sError.SetSprintf ( "no sort order defined" );
3769 return SORT_CLAUSE_ERROR;
3770 }
3771
3772 if ( iField==1 )
3773 tState.m_eKeypart[iField++] = SPH_KEYPART_ID; // add "id ASC"
3774
3775 switch ( iField )
3776 {
3777 case 2: eFunc = FUNC_GENERIC2; break;
3778 case 3: eFunc = FUNC_GENERIC3; break;
3779 case 4: eFunc = FUNC_GENERIC4; break;
3780 case 5: eFunc = FUNC_GENERIC5; break;
3781 default: sError.SetSprintf ( "INTERNAL ERROR: %d fields in sphParseSortClause()", iField ); return SORT_CLAUSE_ERROR;
3782 }
3783 return SORT_CLAUSE_OK;
3784 }
3785
3786 //////////////////////////////////////////////////////////////////////////
3787 // SORTING+GROUPING INSTANTIATION
3788 //////////////////////////////////////////////////////////////////////////
3789
3790 template < typename COMPGROUP >
sphCreateSorter3rd(const ISphMatchComparator * pComp,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings,bool bHasPackedFactors)3791 static ISphMatchSorter * sphCreateSorter3rd ( const ISphMatchComparator * pComp, const CSphQuery * pQuery,
3792 const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors )
3793 {
3794 BYTE uSelector = (bHasPackedFactors?1:0)
3795 +(tSettings.m_bDistinct?2:0)
3796 +(tSettings.m_bMVA?4:0)
3797 +(tSettings.m_bImplicit?8:0)
3798 +((pQuery->m_iGroupbyLimit>1)?16:0)
3799 +(tSettings.m_bJson?32:0);
3800 switch ( uSelector )
3801 {
3802 case 0:
3803 return new CSphKBufferGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
3804 case 1:
3805 return new CSphKBufferGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
3806 case 2:
3807 return new CSphKBufferGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings );
3808 case 3:
3809 return new CSphKBufferGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings );
3810 case 4:
3811 return new CSphKBufferMVAGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
3812 case 5:
3813 return new CSphKBufferMVAGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
3814 case 6:
3815 return new CSphKBufferMVAGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings);
3816 case 7:
3817 return new CSphKBufferMVAGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings);
3818 case 8:
3819 return new CSphImplicitGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
3820 case 9:
3821 return new CSphImplicitGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
3822 case 10:
3823 return new CSphImplicitGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings );
3824 case 11:
3825 return new CSphImplicitGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings );
3826 case 16:
3827 return new CSphKBufferNGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
3828 case 17:
3829 return new CSphKBufferNGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
3830 case 18:
3831 return new CSphKBufferNGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings );
3832 case 19:
3833 return new CSphKBufferNGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings );
3834 case 32:
3835 return new CSphKBufferJsonGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
3836 case 33:
3837 return new CSphKBufferJsonGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
3838 case 34:
3839 return new CSphKBufferJsonGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings);
3840 case 35:
3841 return new CSphKBufferJsonGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings);
3842 }
3843 assert(0);
3844 return NULL;
3845 }
3846
3847
sphCreateSorter2nd(ESphSortFunc eGroupFunc,const ISphMatchComparator * pComp,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings,bool bHasPackedFactors)3848 static ISphMatchSorter * sphCreateSorter2nd ( ESphSortFunc eGroupFunc, const ISphMatchComparator * pComp,
3849 const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors )
3850 {
3851 switch ( eGroupFunc )
3852 {
3853 case FUNC_GENERIC2: return sphCreateSorter3rd<MatchGeneric2_fn> ( pComp, pQuery, tSettings, bHasPackedFactors ); break;
3854 case FUNC_GENERIC3: return sphCreateSorter3rd<MatchGeneric3_fn> ( pComp, pQuery, tSettings, bHasPackedFactors ); break;
3855 case FUNC_GENERIC4: return sphCreateSorter3rd<MatchGeneric4_fn> ( pComp, pQuery, tSettings, bHasPackedFactors ); break;
3856 case FUNC_GENERIC5: return sphCreateSorter3rd<MatchGeneric5_fn> ( pComp, pQuery, tSettings, bHasPackedFactors ); break;
3857 case FUNC_EXPR: return sphCreateSorter3rd<MatchExpr_fn> ( pComp, pQuery, tSettings, bHasPackedFactors ); break;
3858 default: return NULL;
3859 }
3860 }
3861
3862
sphCreateSorter1st(ESphSortFunc eMatchFunc,ESphSortFunc eGroupFunc,const CSphQuery * pQuery,const CSphGroupSorterSettings & tSettings,bool bHasPackedFactors)3863 static ISphMatchSorter * sphCreateSorter1st ( ESphSortFunc eMatchFunc, ESphSortFunc eGroupFunc,
3864 const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors )
3865 {
3866 if ( tSettings.m_bImplicit )
3867 return sphCreateSorter2nd ( eGroupFunc, NULL, pQuery, tSettings, bHasPackedFactors );
3868
3869 ISphMatchComparator * pComp = NULL;
3870 switch ( eMatchFunc )
3871 {
3872 case FUNC_REL_DESC: pComp = new MatchRelevanceLt_fn(); break;
3873 case FUNC_ATTR_DESC: pComp = new MatchAttrLt_fn(); break;
3874 case FUNC_ATTR_ASC: pComp = new MatchAttrGt_fn(); break;
3875 case FUNC_TIMESEGS: pComp = new MatchTimeSegments_fn(); break;
3876 case FUNC_GENERIC2: pComp = new MatchGeneric2_fn(); break;
3877 case FUNC_GENERIC3: pComp = new MatchGeneric3_fn(); break;
3878 case FUNC_GENERIC4: pComp = new MatchGeneric4_fn(); break;
3879 case FUNC_GENERIC5: pComp = new MatchGeneric5_fn(); break;
3880 case FUNC_EXPR: pComp = new MatchExpr_fn(); break; // only for non-bitfields, obviously
3881 }
3882
3883 assert ( pComp );
3884 return sphCreateSorter2nd ( eGroupFunc, pComp, pQuery, tSettings, bHasPackedFactors );
3885 }
3886
3887 //////////////////////////////////////////////////////////////////////////
3888 // GEODIST
3889 //////////////////////////////////////////////////////////////////////////
3890
3891 struct ExprGeodist_t : public ISphExpr
3892 {
3893 public:
ExprGeodist_tExprGeodist_t3894 ExprGeodist_t () {}
3895 bool Setup ( const CSphQuery * pQuery, const ISphSchema & tSchema, CSphString & sError );
3896 virtual float Eval ( const CSphMatch & tMatch ) const;
3897 virtual void Command ( ESphExprCommand eCmd, void * pArg );
3898
3899 protected:
3900 CSphAttrLocator m_tGeoLatLoc;
3901 CSphAttrLocator m_tGeoLongLoc;
3902 float m_fGeoAnchorLat;
3903 float m_fGeoAnchorLong;
3904 int m_iLat;
3905 int m_iLon;
3906 };
3907
3908
Setup(const CSphQuery * pQuery,const ISphSchema & tSchema,CSphString & sError)3909 bool ExprGeodist_t::Setup ( const CSphQuery * pQuery, const ISphSchema & tSchema, CSphString & sError )
3910 {
3911 if ( !pQuery->m_bGeoAnchor )
3912 {
3913 sError.SetSprintf ( "INTERNAL ERROR: no geoanchor, can not create geodist evaluator" );
3914 return false;
3915 }
3916
3917 int iLat = tSchema.GetAttrIndex ( pQuery->m_sGeoLatAttr.cstr() );
3918 if ( iLat<0 )
3919 {
3920 sError.SetSprintf ( "unknown latitude attribute '%s'", pQuery->m_sGeoLatAttr.cstr() );
3921 return false;
3922 }
3923
3924 int iLong = tSchema.GetAttrIndex ( pQuery->m_sGeoLongAttr.cstr() );
3925 if ( iLong<0 )
3926 {
3927 sError.SetSprintf ( "unknown latitude attribute '%s'", pQuery->m_sGeoLongAttr.cstr() );
3928 return false;
3929 }
3930
3931 m_tGeoLatLoc = tSchema.GetAttr(iLat).m_tLocator;
3932 m_tGeoLongLoc = tSchema.GetAttr(iLong).m_tLocator;
3933 m_fGeoAnchorLat = pQuery->m_fGeoLatitude;
3934 m_fGeoAnchorLong = pQuery->m_fGeoLongitude;
3935 m_iLat = iLat;
3936 m_iLon = iLong;
3937 return true;
3938 }
3939
3940
sphSqr(double v)3941 static inline double sphSqr ( double v )
3942 {
3943 return v*v;
3944 }
3945
3946
Eval(const CSphMatch & tMatch) const3947 float ExprGeodist_t::Eval ( const CSphMatch & tMatch ) const
3948 {
3949 const double R = 6384000;
3950 float plat = tMatch.GetAttrFloat ( m_tGeoLatLoc );
3951 float plon = tMatch.GetAttrFloat ( m_tGeoLongLoc );
3952 double dlat = plat - m_fGeoAnchorLat;
3953 double dlon = plon - m_fGeoAnchorLong;
3954 double a = sphSqr ( sin ( dlat/2 ) ) + cos(plat)*cos(m_fGeoAnchorLat)*sphSqr(sin(dlon/2));
3955 double c = 2*asin ( Min ( 1.0, sqrt(a) ) );
3956 return (float)(R*c);
3957 }
3958
Command(ESphExprCommand eCmd,void * pArg)3959 void ExprGeodist_t::Command ( ESphExprCommand eCmd, void * pArg )
3960 {
3961 if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
3962 {
3963 static_cast < CSphVector<int>* >(pArg)->Add ( m_iLat );
3964 static_cast < CSphVector<int>* >(pArg)->Add ( m_iLon );
3965 }
3966 }
3967
3968 //////////////////////////////////////////////////////////////////////////
3969 // PUBLIC FUNCTIONS (FACTORY AND FLATTENING)
3970 //////////////////////////////////////////////////////////////////////////
3971
3972 static CSphGrouper * sphCreateGrouperString ( const CSphAttrLocator & tLoc, ESphCollation eCollation );
3973 static CSphGrouper * sphCreateGrouperMulti ( const CSphVector<CSphAttrLocator> & dLocators, const CSphVector<ESphAttr> & dAttrTypes,
3974 const CSphVector<ISphExpr *> & dJsonKeys, ESphCollation eCollation );
3975
SetupGroupbySettings(const CSphQuery * pQuery,const ISphSchema & tSchema,CSphGroupSorterSettings & tSettings,CSphVector<int> & dGroupColumns,CSphString & sError,bool bImplicit)3976 static bool SetupGroupbySettings ( const CSphQuery * pQuery, const ISphSchema & tSchema,
3977 CSphGroupSorterSettings & tSettings, CSphVector<int> & dGroupColumns, CSphString & sError, bool bImplicit )
3978 {
3979 tSettings.m_tDistinctLoc.m_iBitOffset = -1;
3980
3981 if ( pQuery->m_sGroupBy.IsEmpty() && !bImplicit )
3982 return true;
3983
3984 if ( pQuery->m_eGroupFunc==SPH_GROUPBY_ATTRPAIR )
3985 {
3986 sError.SetSprintf ( "SPH_GROUPBY_ATTRPAIR is not supported any more (just group on 'bigint' attribute)" );
3987 return false;
3988 }
3989
3990 CSphString sJsonColumn;
3991 CSphString sJsonKey;
3992 if ( pQuery->m_eGroupFunc==SPH_GROUPBY_MULTIPLE )
3993 {
3994 CSphVector<CSphAttrLocator> dLocators;
3995 CSphVector<ESphAttr> dAttrTypes;
3996 CSphVector<ISphExpr *> dJsonKeys;
3997
3998 CSphVector<CSphString> dGroupBy;
3999 const char * a = pQuery->m_sGroupBy.cstr();
4000 const char * b = a;
4001 while ( *a )
4002 {
4003 while ( *b && *b!=',' )
4004 b++;
4005 CSphString & sNew = dGroupBy.Add();
4006 sNew.SetBinary ( a, b-a );
4007 if ( *b==',' )
4008 b += 2;
4009 a = b;
4010 }
4011 dGroupBy.Uniq();
4012
4013 ARRAY_FOREACH ( i, dGroupBy )
4014 {
4015 CSphString sJsonExpr;
4016 dJsonKeys.Add ( NULL );
4017 if ( sphJsonNameSplit ( dGroupBy[i].cstr(), &sJsonColumn, &sJsonKey ) )
4018 {
4019 sJsonExpr = dGroupBy[i];
4020 dGroupBy[i] = sJsonColumn;
4021 }
4022
4023 const int iAttr = tSchema.GetAttrIndex ( dGroupBy[i].cstr() );
4024 if ( iAttr<0 )
4025 {
4026 sError.SetSprintf ( "group-by attribute '%s' not found", dGroupBy[i].cstr() );
4027 return false;
4028 }
4029
4030 ESphAttr eType = tSchema.GetAttr ( iAttr ).m_eAttrType;
4031 if ( eType==SPH_ATTR_UINT32SET || eType==SPH_ATTR_INT64SET )
4032 {
4033 sError.SetSprintf ( "MVA values can't be used in multiple group-by" );
4034 return false;
4035 } else if ( sJsonExpr.IsEmpty() && eType==SPH_ATTR_JSON )
4036 {
4037 sError.SetSprintf ( "JSON blob can't be used in multiple group-by" );
4038 return false;
4039 }
4040
4041 dLocators.Add ( tSchema.GetAttr ( iAttr ).m_tLocator );
4042 dAttrTypes.Add ( eType );
4043 dGroupColumns.Add ( iAttr );
4044
4045 if ( !sJsonExpr.IsEmpty() )
4046 dJsonKeys.Last() = sphExprParse ( sJsonExpr.cstr(), tSchema, NULL, NULL, sError, NULL );
4047 }
4048
4049 tSettings.m_pGrouper = sphCreateGrouperMulti ( dLocators, dAttrTypes, dJsonKeys, pQuery->m_eCollation );
4050
4051 } else if ( sphJsonNameSplit ( pQuery->m_sGroupBy.cstr(), &sJsonColumn, &sJsonKey ) )
4052 {
4053 const int iAttr = tSchema.GetAttrIndex ( sJsonColumn.cstr() );
4054 if ( iAttr<0 )
4055 {
4056 sError.SetSprintf ( "groupby: no such attribute '%s'", sJsonColumn.cstr() );
4057 return false;
4058 }
4059
4060 if ( tSchema.GetAttr(iAttr).m_eAttrType!=SPH_ATTR_JSON )
4061 {
4062 sError.SetSprintf ( "groupby: attribute '%s' does not have subfields (must be sql_attr_json)", sJsonColumn.cstr() );
4063 return false;
4064 }
4065
4066 if ( pQuery->m_eGroupFunc!=SPH_GROUPBY_ATTR )
4067 {
4068 sError.SetSprintf ( "groupby: legacy groupby modes are not supported on JSON attributes" );
4069 return false;
4070 }
4071
4072 dGroupColumns.Add ( iAttr );
4073
4074 ISphExpr * pExpr = sphExprParse ( pQuery->m_sGroupBy.cstr(), tSchema, NULL, NULL, sError, NULL, pQuery->m_eCollation );
4075 tSettings.m_pGrouper = new CSphGrouperJsonField ( tSchema.GetAttr(iAttr).m_tLocator, pExpr );
4076 tSettings.m_bJson = true;
4077
4078 } else if ( bImplicit )
4079 {
4080 tSettings.m_bImplicit = true;
4081
4082 } else
4083 {
4084 // setup groupby attr
4085 int iGroupBy = tSchema.GetAttrIndex ( pQuery->m_sGroupBy.cstr() );
4086
4087 if ( iGroupBy<0 )
4088 {
4089 // try aliased groupby attr (facets)
4090 ARRAY_FOREACH ( i, pQuery->m_dItems )
4091 if ( pQuery->m_sGroupBy==pQuery->m_dItems[i].m_sExpr )
4092 {
4093 iGroupBy = tSchema.GetAttrIndex ( pQuery->m_dItems[i].m_sAlias.cstr() );
4094 break;
4095
4096 } else if ( pQuery->m_sGroupBy==pQuery->m_dItems[i].m_sAlias )
4097 {
4098 iGroupBy = tSchema.GetAttrIndex ( pQuery->m_dItems[i].m_sExpr.cstr() );
4099 break;
4100 }
4101 }
4102
4103 if ( iGroupBy<0 )
4104 {
4105 sError.SetSprintf ( "group-by attribute '%s' not found", pQuery->m_sGroupBy.cstr() );
4106 return false;
4107 }
4108
4109 ESphAttr eType = tSchema.GetAttr ( iGroupBy ).m_eAttrType;
4110 CSphAttrLocator tLoc = tSchema.GetAttr ( iGroupBy ).m_tLocator;
4111 switch ( pQuery->m_eGroupFunc )
4112 {
4113 case SPH_GROUPBY_DAY: tSettings.m_pGrouper = new CSphGrouperDay ( tLoc ); break;
4114 case SPH_GROUPBY_WEEK: tSettings.m_pGrouper = new CSphGrouperWeek ( tLoc ); break;
4115 case SPH_GROUPBY_MONTH: tSettings.m_pGrouper = new CSphGrouperMonth ( tLoc ); break;
4116 case SPH_GROUPBY_YEAR: tSettings.m_pGrouper = new CSphGrouperYear ( tLoc ); break;
4117 case SPH_GROUPBY_ATTR:
4118 if ( eType==SPH_ATTR_JSON || eType==SPH_ATTR_JSON_FIELD )
4119 {
4120 ISphExpr * pExpr = sphExprParse ( pQuery->m_sGroupBy.cstr(), tSchema, NULL, NULL, sError, NULL, pQuery->m_eCollation );
4121 tSettings.m_pGrouper = new CSphGrouperJsonField ( tLoc, pExpr );
4122 tSettings.m_bJson = true;
4123
4124 } else if ( eType==SPH_ATTR_STRING )
4125 tSettings.m_pGrouper = sphCreateGrouperString ( tLoc, pQuery->m_eCollation );
4126 else
4127 tSettings.m_pGrouper = new CSphGrouperAttr ( tLoc );
4128 break;
4129 default:
4130 sError.SetSprintf ( "invalid group-by mode (mode=%d)", pQuery->m_eGroupFunc );
4131 return false;
4132 }
4133
4134 tSettings.m_bMVA = ( eType==SPH_ATTR_UINT32SET || eType==SPH_ATTR_INT64SET );
4135 tSettings.m_bMva64 = ( eType==SPH_ATTR_INT64SET );
4136 dGroupColumns.Add ( iGroupBy );
4137 }
4138
4139 // setup distinct attr
4140 if ( !pQuery->m_sGroupDistinct.IsEmpty() )
4141 {
4142 int iDistinct = tSchema.GetAttrIndex ( pQuery->m_sGroupDistinct.cstr() );
4143 if ( iDistinct<0 )
4144 {
4145 sError.SetSprintf ( "group-count-distinct attribute '%s' not found", pQuery->m_sGroupDistinct.cstr() );
4146 return false;
4147 }
4148
4149 tSettings.m_tDistinctLoc = tSchema.GetAttr ( iDistinct ).m_tLocator;
4150 }
4151
4152 return true;
4153 }
4154
4155 // move expressions used in ORDER BY or WITHIN GROUP ORDER BY to presort phase
FixupDependency(ISphSchema & tSchema,const int * pAttrs,int iAttrCount)4156 static bool FixupDependency ( ISphSchema & tSchema, const int * pAttrs, int iAttrCount )
4157 {
4158 assert ( pAttrs );
4159
4160 CSphVector<int> dCur;
4161
4162 // add valid attributes to processing list
4163 for ( int i=0; i<iAttrCount; i++ )
4164 if ( pAttrs[i]>=0 )
4165 dCur.Add ( pAttrs[i] );
4166
4167 int iInitialAttrs = dCur.GetLength();
4168
4169 // collect columns which affect current expressions
4170 for ( int i=0; i<dCur.GetLength(); i++ )
4171 {
4172 const CSphColumnInfo & tCol = tSchema.GetAttr ( dCur[i] );
4173 if ( tCol.m_eStage>SPH_EVAL_PRESORT && tCol.m_pExpr.Ptr()!=NULL )
4174 tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
4175 }
4176
4177 // get rid of dupes
4178 dCur.Uniq();
4179
4180 // fix up of attributes stages
4181 ARRAY_FOREACH ( i, dCur )
4182 {
4183 int iAttr = dCur[i];
4184 if ( iAttr<0 )
4185 continue;
4186
4187 CSphColumnInfo & tCol = const_cast < CSphColumnInfo & > ( tSchema.GetAttr ( iAttr ) );
4188 if ( tCol.m_eStage==SPH_EVAL_FINAL )
4189 tCol.m_eStage = SPH_EVAL_PRESORT;
4190 }
4191
4192 // it uses attributes if it has dependencies from other attributes
4193 return ( iInitialAttrs>dCur.GetLength() );
4194 }
4195
4196
4197 // expression that transform string pool base + offset -> ptr
4198 struct ExprSortStringAttrFixup_c : public ISphExpr
4199 {
4200 const BYTE * m_pStrings; ///< string pool; base for offset of string attributes
4201 const CSphAttrLocator m_tLocator; ///< string attribute to fix
4202
ExprSortStringAttrFixup_cExprSortStringAttrFixup_c4203 explicit ExprSortStringAttrFixup_c ( const CSphAttrLocator & tLocator )
4204 : m_pStrings ( NULL )
4205 , m_tLocator ( tLocator )
4206 {
4207 }
4208
EvalExprSortStringAttrFixup_c4209 virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0.0f; }
4210
Int64EvalExprSortStringAttrFixup_c4211 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
4212 {
4213 SphAttr_t uOff = tMatch.GetAttr ( m_tLocator );
4214 return (int64_t)( m_pStrings && uOff ? m_pStrings + uOff : NULL );
4215 }
4216
CommandExprSortStringAttrFixup_c4217 virtual void Command ( ESphExprCommand eCmd, void * pArg )
4218 {
4219 if ( eCmd==SPH_EXPR_SET_STRING_POOL )
4220 m_pStrings = (const BYTE*)pArg;
4221 }
4222 };
4223
4224
4225 // expression that transform string pool base + offset -> ptr
4226 struct ExprSortJson2StringPtr_c : public ISphExpr
4227 {
4228 const BYTE * m_pStrings; ///< string pool; base for offset of string attributes
4229 const CSphAttrLocator m_tJsonCol; ///< JSON attribute to fix
4230 CSphRefcountedPtr<ISphExpr> m_pExpr;
4231
ExprSortJson2StringPtr_cExprSortJson2StringPtr_c4232 ExprSortJson2StringPtr_c ( const CSphAttrLocator & tLocator, ISphExpr * pExpr )
4233 : m_pStrings ( NULL )
4234 , m_tJsonCol ( tLocator )
4235 , m_pExpr ( pExpr )
4236 {}
4237
IsStringPtrExprSortJson2StringPtr_c4238 virtual bool IsStringPtr () const { return true; }
4239
EvalExprSortJson2StringPtr_c4240 virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0.0f; }
4241
StringEvalExprSortJson2StringPtr_c4242 virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
4243 {
4244 if ( !m_pStrings || !m_pExpr )
4245 {
4246 *ppStr = NULL;
4247 return 0;
4248 }
4249
4250 uint64_t uValue = m_pExpr->Int64Eval ( tMatch );
4251 const BYTE * pVal = m_pStrings + ( uValue & 0xffffffff );
4252 ESphJsonType eJson = (ESphJsonType)( uValue >> 32 );
4253 CSphString sVal;
4254
4255 // FIXME!!! make string length configurable for STRING and STRING_VECTOR to compare and allocate only Min(String.Length, CMP_LENGTH)
4256 switch ( eJson )
4257 {
4258 case JSON_INT32:
4259 sVal.SetSprintf ( "%d", sphJsonLoadInt ( &pVal ) );
4260 break;
4261 case JSON_INT64:
4262 sVal.SetSprintf ( INT64_FMT, sphJsonLoadBigint ( &pVal ) );
4263 break;
4264 case JSON_DOUBLE:
4265 sVal.SetSprintf ( "%f", sphQW2D ( sphJsonLoadBigint ( &pVal ) ) );
4266 break;
4267 case JSON_STRING:
4268 {
4269 int iLen = sphJsonUnpackInt ( &pVal );
4270 sVal.SetBinary ( (const char *)pVal, iLen );
4271 break;
4272 }
4273 case JSON_STRING_VECTOR:
4274 {
4275 int iTotalLen = sphJsonUnpackInt ( &pVal );
4276 int iCount = sphJsonUnpackInt ( &pVal );
4277
4278 CSphFixedVector<BYTE> dBuf ( iTotalLen + 4 ); // data and tail GAP
4279 BYTE * pDst = dBuf.Begin();
4280
4281 // head element
4282 if ( iCount )
4283 {
4284 int iElemLen = sphJsonUnpackInt ( &pVal );
4285 memcpy ( pDst, pVal, iElemLen );
4286 pDst += iElemLen;
4287 }
4288
4289 // tail elements separated by space
4290 for ( int i=1; i<iCount; i++ )
4291 {
4292 *pDst++ = ' ';
4293 int iElemLen = sphJsonUnpackInt ( &pVal );
4294 memcpy ( pDst, pVal, iElemLen );
4295 pDst += iElemLen;
4296 }
4297
4298 int iStrLen = pDst-dBuf.Begin();
4299 // filling junk space
4300 while ( pDst<dBuf.Begin()+dBuf.GetLength() )
4301 *pDst++ = '\0';
4302
4303 *ppStr = dBuf.LeakData();
4304 return iStrLen;
4305 }
4306 default:
4307 break;
4308 }
4309
4310 int iStriLen = sVal.Length();
4311 *ppStr = (const BYTE *)sVal.Leak();
4312 return iStriLen;
4313 }
4314
CommandExprSortJson2StringPtr_c4315 virtual void Command ( ESphExprCommand eCmd, void * pArg )
4316 {
4317 if ( eCmd==SPH_EXPR_SET_STRING_POOL )
4318 {
4319 m_pStrings = (const BYTE*)pArg;
4320 if ( m_pExpr.Ptr() )
4321 m_pExpr->Command ( eCmd, pArg );
4322 }
4323 }
4324 };
4325
4326
4327
sphIsSortStringInternal(const char * sColumnName)4328 bool sphIsSortStringInternal ( const char * sColumnName )
4329 {
4330 assert ( sColumnName );
4331 return ( strncmp ( sColumnName, g_sIntAttrPrefix, sizeof(g_sIntAttrPrefix)-1 )==0 );
4332 }
4333
4334
4335 // only STRING ( static packed ) and JSON fields mush be remapped
SetupSortRemap(CSphRsetSchema & tSorterSchema,CSphMatchComparatorState & tState)4336 static void SetupSortRemap ( CSphRsetSchema & tSorterSchema, CSphMatchComparatorState & tState )
4337 {
4338 #ifndef NDEBUG
4339 int iColWasCount = tSorterSchema.GetAttrsCount();
4340 #endif
4341 for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
4342 {
4343 if ( !( tState.m_eKeypart[i]==SPH_KEYPART_STRING || tState.m_tSubKeys[i].m_sKey.cstr() ) )
4344 continue;
4345
4346 assert ( tState.m_dAttrs[i]>=0 && tState.m_dAttrs[i]<iColWasCount );
4347
4348 bool bIsJson = !tState.m_tSubKeys[i].m_sKey.IsEmpty();
4349 bool bIsFunc = bIsJson && tState.m_tSubKeys[i].m_uMask==0;
4350
4351 CSphString sRemapCol;
4352 sRemapCol.SetSprintf ( "%s%s", g_sIntAttrPrefix, bIsJson
4353 ? tState.m_tSubKeys[i].m_sKey.cstr()
4354 : tSorterSchema.GetAttr ( tState.m_dAttrs[i] ).m_sName.cstr() );
4355
4356 int iRemap = tSorterSchema.GetAttrIndex ( sRemapCol.cstr() );
4357 if ( iRemap==-1 && bIsJson )
4358 {
4359 CSphString sRemapLowercase = sRemapCol;
4360 sRemapLowercase.ToLower();
4361 iRemap = tSorterSchema.GetAttrIndex ( sRemapLowercase.cstr() );
4362 }
4363
4364 if ( iRemap==-1 )
4365 {
4366 CSphColumnInfo tRemapCol ( sRemapCol.cstr(), bIsJson ? SPH_ATTR_STRINGPTR : SPH_ATTR_BIGINT );
4367 tRemapCol.m_eStage = SPH_EVAL_PRESORT;
4368 if ( bIsJson )
4369 tRemapCol.m_pExpr = bIsFunc ? tState.m_tSubExpr[i] : new ExprSortJson2StringPtr_c ( tState.m_tLocator[i], tState.m_tSubExpr[i] );
4370 else
4371 tRemapCol.m_pExpr = new ExprSortStringAttrFixup_c ( tState.m_tLocator[i] );
4372
4373 if ( bIsFunc )
4374 {
4375 tRemapCol.m_eAttrType = tState.m_tSubType[i];
4376 tState.m_eKeypart[i] = Attr2Keypart ( tState.m_tSubType[i] );
4377 }
4378
4379 iRemap = tSorterSchema.GetAttrsCount();
4380 tSorterSchema.AddDynamicAttr ( tRemapCol );
4381 }
4382 tState.m_tLocator[i] = tSorterSchema.GetAttr ( iRemap ).m_tLocator;
4383 }
4384 }
4385
4386
sphSortGetStringRemap(const ISphSchema & tSorterSchema,const ISphSchema & tIndexSchema,CSphVector<SphStringSorterRemap_t> & dAttrs)4387 bool sphSortGetStringRemap ( const ISphSchema & tSorterSchema, const ISphSchema & tIndexSchema,
4388 CSphVector<SphStringSorterRemap_t> & dAttrs )
4389 {
4390 dAttrs.Resize ( 0 );
4391 for ( int i=0; i<tSorterSchema.GetAttrsCount(); i++ )
4392 {
4393 const CSphColumnInfo & tDst = tSorterSchema.GetAttr(i);
4394 // remap only static strings
4395 if ( !tDst.m_sName.Begins ( g_sIntAttrPrefix ) || tDst.m_eAttrType==SPH_ATTR_STRINGPTR )
4396 continue;
4397
4398 const CSphColumnInfo * pSrcCol = tIndexSchema.GetAttr ( tDst.m_sName.cstr()+sizeof(g_sIntAttrPrefix)-1 );
4399 if ( !pSrcCol ) // skip internal attributes received from agents
4400 continue;
4401
4402 SphStringSorterRemap_t & tRemap = dAttrs.Add();
4403 tRemap.m_tSrc = pSrcCol->m_tLocator;
4404 tRemap.m_tDst = tDst.m_tLocator;
4405 }
4406 return ( dAttrs.GetLength()>0 );
4407 }
4408
4409 ////////////////////
4410 // BINARY COLLATION
4411 ////////////////////
4412
sphCollateBinary(const BYTE * pStr1,const BYTE * pStr2,bool bPacked)4413 int sphCollateBinary ( const BYTE * pStr1, const BYTE * pStr2, bool bPacked )
4414 {
4415 if ( bPacked )
4416 {
4417 int iLen1 = sphUnpackStr ( pStr1, &pStr1 );
4418 int iLen2 = sphUnpackStr ( pStr2, &pStr2 );
4419 int iRes = memcmp ( (const char *)pStr1, (const char *)pStr2, Min ( iLen1, iLen2 ) );
4420 return iRes ? iRes : ( iLen1-iLen2 );
4421 } else
4422 {
4423 return strcmp ( (const char *)pStr1, (const char *)pStr2 );
4424 }
4425 }
4426
4427 ///////////////////////////////
4428 // LIBC_CI, LIBC_CS COLLATIONS
4429 ///////////////////////////////
4430
4431 /// libc_ci, wrapper for strcasecmp
sphCollateLibcCI(const BYTE * pStr1,const BYTE * pStr2,bool bPacked)4432 int sphCollateLibcCI ( const BYTE * pStr1, const BYTE * pStr2, bool bPacked )
4433 {
4434 if ( bPacked )
4435 {
4436 int iLen1 = sphUnpackStr ( pStr1, &pStr1 );
4437 int iLen2 = sphUnpackStr ( pStr2, &pStr2 );
4438 int iRes = strncasecmp ( (const char *)pStr1, (const char *)pStr2, Min ( iLen1, iLen2 ) );
4439 return iRes ? iRes : ( iLen1-iLen2 );
4440 } else
4441 {
4442 return strcasecmp ( (const char *)pStr1, (const char *)pStr2 );
4443 }
4444 }
4445
4446
4447 /// libc_cs, wrapper for strcoll
sphCollateLibcCS(const BYTE * pStr1,const BYTE * pStr2,bool bPacked)4448 int sphCollateLibcCS ( const BYTE * pStr1, const BYTE * pStr2, bool bPacked )
4449 {
4450 #define COLLATE_STACK_BUFFER 1024
4451
4452 if ( bPacked )
4453 {
4454 int iLen1 = sphUnpackStr ( pStr1, &pStr1 );
4455 int iLen2 = sphUnpackStr ( pStr2, &pStr2 );
4456
4457 // strcoll wants asciiz strings, so we would have to copy them over
4458 // lets use stack buffer for smaller ones, and allocate from heap for bigger ones
4459 int iRes = 0;
4460 int iLen = Min ( iLen1, iLen2 );
4461 if ( iLen<COLLATE_STACK_BUFFER )
4462 {
4463 // small strings on stack
4464 BYTE sBuf1[COLLATE_STACK_BUFFER];
4465 BYTE sBuf2[COLLATE_STACK_BUFFER];
4466
4467 memcpy ( sBuf1, pStr1, iLen );
4468 memcpy ( sBuf2, pStr2, iLen );
4469 sBuf1[iLen] = sBuf2[iLen] = '\0';
4470 iRes = strcoll ( (const char*)sBuf1, (const char*)sBuf2 );
4471 } else
4472 {
4473 // big strings on heap
4474 char * pBuf1 = new char [ iLen ];
4475 char * pBuf2 = new char [ iLen ];
4476
4477 memcpy ( pBuf1, pStr1, iLen );
4478 memcpy ( pBuf2, pStr2, iLen );
4479 pBuf1[iLen] = pBuf2[iLen] = '\0';
4480 iRes = strcoll ( (const char*)pBuf1, (const char*)pBuf2 );
4481
4482 SafeDeleteArray ( pBuf2 );
4483 SafeDeleteArray ( pBuf1 );
4484 }
4485
4486 return iRes ? iRes : ( iLen1-iLen2 );
4487 } else
4488 {
4489 return strcoll ( (const char *)pStr1, (const char *)pStr2 );
4490 }
4491 }
4492
4493 /////////////////////////////
4494 // UTF8_GENERAL_CI COLLATION
4495 /////////////////////////////
4496
4497 /// 1st level LUT
4498 static unsigned short * g_dCollPlanes_UTF8CI[0x100];
4499
4500 /// 2nd level LUT, non-trivial collation data
4501 static unsigned short g_dCollWeights_UTF8CI[0xb00] =
4502 {
4503 // weights for 0x0 to 0x5ff
4504 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4505 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4506 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
4507 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
4508 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
4509 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
4510 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
4511 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 123, 124, 125, 126, 127,
4512 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
4513 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
4514 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
4515 176, 177, 178, 179, 180, 924, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
4516 65, 65, 65, 65, 65, 65, 198, 67, 69, 69, 69, 69, 73, 73, 73, 73,
4517 208, 78, 79, 79, 79, 79, 79, 215, 216, 85, 85, 85, 85, 89, 222, 83,
4518 65, 65, 65, 65, 65, 65, 198, 67, 69, 69, 69, 69, 73, 73, 73, 73,
4519 208, 78, 79, 79, 79, 79, 79, 247, 216, 85, 85, 85, 85, 89, 222, 89,
4520 65, 65, 65, 65, 65, 65, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68,
4521 272, 272, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 71, 71, 71, 71,
4522 71, 71, 71, 71, 72, 72, 294, 294, 73, 73, 73, 73, 73, 73, 73, 73,
4523 73, 73, 306, 306, 74, 74, 75, 75, 312, 76, 76, 76, 76, 76, 76, 319,
4524 319, 321, 321, 78, 78, 78, 78, 78, 78, 329, 330, 330, 79, 79, 79, 79,
4525 79, 79, 338, 338, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83,
4526 83, 83, 84, 84, 84, 84, 358, 358, 85, 85, 85, 85, 85, 85, 85, 85,
4527 85, 85, 85, 85, 87, 87, 89, 89, 89, 90, 90, 90, 90, 90, 90, 83,
4528 384, 385, 386, 386, 388, 388, 390, 391, 391, 393, 394, 395, 395, 397, 398, 399,
4529 400, 401, 401, 403, 404, 502, 406, 407, 408, 408, 410, 411, 412, 413, 414, 415,
4530 79, 79, 418, 418, 420, 420, 422, 423, 423, 425, 426, 427, 428, 428, 430, 85,
4531 85, 433, 434, 435, 435, 437, 437, 439, 440, 440, 442, 443, 444, 444, 446, 503,
4532 448, 449, 450, 451, 452, 452, 452, 455, 455, 455, 458, 458, 458, 65, 65, 73,
4533 73, 79, 79, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 398, 65, 65,
4534 65, 65, 198, 198, 484, 484, 71, 71, 75, 75, 79, 79, 79, 79, 439, 439,
4535 74, 497, 497, 497, 71, 71, 502, 503, 78, 78, 65, 65, 198, 198, 216, 216,
4536 65, 65, 65, 65, 69, 69, 69, 69, 73, 73, 73, 73, 79, 79, 79, 79,
4537 82, 82, 82, 82, 85, 85, 85, 85, 83, 83, 84, 84, 540, 540, 72, 72,
4538 544, 545, 546, 546, 548, 548, 65, 65, 69, 69, 79, 79, 79, 79, 79, 79,
4539 79, 79, 89, 89, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575,
4540 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591,
4541 592, 593, 594, 385, 390, 597, 393, 394, 600, 399, 602, 400, 604, 605, 606, 607,
4542 403, 609, 610, 404, 612, 613, 614, 615, 407, 406, 618, 619, 620, 621, 622, 412,
4543 624, 625, 413, 627, 628, 415, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639,
4544 422, 641, 642, 425, 644, 645, 646, 647, 430, 649, 433, 434, 652, 653, 654, 655,
4545 656, 657, 439, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671,
4546 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687,
4547 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703,
4548 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719,
4549 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735,
4550 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751,
4551 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767,
4552 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783,
4553 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799,
4554 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815,
4555 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831,
4556 832, 833, 834, 835, 836, 921, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847,
4557 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863,
4558 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879,
4559 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895,
4560 896, 897, 898, 899, 900, 901, 913, 903, 917, 919, 921, 907, 927, 909, 933, 937,
4561 921, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927,
4562 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 921, 933, 913, 917, 919, 921,
4563 933, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927,
4564 928, 929, 931, 931, 932, 933, 934, 935, 936, 937, 921, 933, 927, 933, 937, 975,
4565 914, 920, 978, 978, 978, 934, 928, 983, 984, 985, 986, 986, 988, 988, 990, 990,
4566 992, 992, 994, 994, 996, 996, 998, 998, 1000, 1000, 1002, 1002, 1004, 1004, 1006, 1006,
4567 922, 929, 931, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
4568 1045, 1045, 1026, 1043, 1028, 1029, 1030, 1030, 1032, 1033, 1034, 1035, 1050, 1048, 1059, 1039,
4569 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
4570 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
4571 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
4572 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
4573 1045, 1045, 1026, 1043, 1028, 1029, 1030, 1030, 1032, 1033, 1034, 1035, 1050, 1048, 1059, 1039,
4574 1120, 1120, 1122, 1122, 1124, 1124, 1126, 1126, 1128, 1128, 1130, 1130, 1132, 1132, 1134, 1134,
4575 1136, 1136, 1138, 1138, 1140, 1140, 1140, 1140, 1144, 1144, 1146, 1146, 1148, 1148, 1150, 1150,
4576 1152, 1152, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1164, 1166, 1166,
4577 1168, 1168, 1170, 1170, 1172, 1172, 1174, 1174, 1176, 1176, 1178, 1178, 1180, 1180, 1182, 1182,
4578 1184, 1184, 1186, 1186, 1188, 1188, 1190, 1190, 1192, 1192, 1194, 1194, 1196, 1196, 1198, 1198,
4579 1200, 1200, 1202, 1202, 1204, 1204, 1206, 1206, 1208, 1208, 1210, 1210, 1212, 1212, 1214, 1214,
4580 1216, 1046, 1046, 1219, 1219, 1221, 1222, 1223, 1223, 1225, 1226, 1227, 1227, 1229, 1230, 1231,
4581 1040, 1040, 1040, 1040, 1236, 1236, 1045, 1045, 1240, 1240, 1240, 1240, 1046, 1046, 1047, 1047,
4582 1248, 1248, 1048, 1048, 1048, 1048, 1054, 1054, 1256, 1256, 1256, 1256, 1069, 1069, 1059, 1059,
4583 1059, 1059, 1059, 1059, 1063, 1063, 1270, 1271, 1067, 1067, 1274, 1275, 1276, 1277, 1278, 1279,
4584 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295,
4585 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311,
4586 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327,
4587 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343,
4588 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359,
4589 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375,
4590 1376, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343,
4591 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359,
4592 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423,
4593 1424, 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439,
4594 1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455,
4595 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471,
4596 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, 1482, 1483, 1484, 1485, 1486, 1487,
4597 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503,
4598 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519,
4599 1520, 1521, 1522, 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1533, 1534, 1535,
4600
4601 // weights for codepoints 0x1e00 to 0x1fff
4602 65, 65, 66, 66, 66, 66, 66, 66, 67, 67, 68, 68, 68, 68, 68, 68,
4603 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70,
4604 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73,
4605 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 77, 77,
4606 77, 77, 77, 77, 78, 78, 78, 78, 78, 78, 78, 78, 79, 79, 79, 79,
4607 79, 79, 79, 79, 80, 80, 80, 80, 82, 82, 82, 82, 82, 82, 82, 82,
4608 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84,
4609 84, 84, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 86, 86, 86, 86,
4610 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 88, 88, 88, 89, 89,
4611 90, 90, 90, 90, 90, 90, 72, 84, 87, 89, 7834, 83, 7836, 7837, 7838, 7839,
4612 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
4613 65, 65, 65, 65, 65, 65, 65, 65, 69, 69, 69, 69, 69, 69, 69, 69,
4614 69, 69, 69, 69, 69, 69, 69, 69, 73, 73, 73, 73, 79, 79, 79, 79,
4615 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
4616 79, 79, 79, 79, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
4617 85, 85, 89, 89, 89, 89, 89, 89, 89, 89, 7930, 7931, 7932, 7933, 7934, 7935,
4618 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913,
4619 917, 917, 917, 917, 917, 917, 7958, 7959, 917, 917, 917, 917, 917, 917, 7966, 7967,
4620 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919,
4621 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921,
4622 927, 927, 927, 927, 927, 927, 8006, 8007, 927, 927, 927, 927, 927, 927, 8014, 8015,
4623 933, 933, 933, 933, 933, 933, 933, 933, 8024, 933, 8026, 933, 8028, 933, 8030, 933,
4624 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937,
4625 913, 8123, 917, 8137, 919, 8139, 921, 8155, 927, 8185, 933, 8171, 937, 8187, 8062, 8063,
4626 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913,
4627 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919,
4628 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937,
4629 913, 913, 913, 913, 913, 8117, 913, 913, 913, 913, 913, 8123, 913, 8125, 921, 8127,
4630 8128, 8129, 919, 919, 919, 8133, 919, 919, 917, 8137, 919, 8139, 919, 8141, 8142, 8143,
4631 921, 921, 921, 8147, 8148, 8149, 921, 921, 921, 921, 921, 8155, 8156, 8157, 8158, 8159,
4632 933, 933, 933, 8163, 929, 929, 933, 933, 933, 933, 933, 8171, 929, 8173, 8174, 8175,
4633 8176, 8177, 937, 937, 937, 8181, 937, 937, 927, 8185, 937, 8187, 937, 8189, 8190, 8191
4634
4635 // space for codepoints 0x21xx, 0x24xx, 0xffxx (generated)
4636 };
4637
4638
4639 /// initialize collation LUTs
sphCollationInit()4640 void sphCollationInit()
4641 {
4642 const int dWeightPlane[0x0b] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x1e, 0x1f, 0x21, 0x24, 0xff };
4643
4644 // generate missing weights
4645 for ( int i=0; i<0x100; i++ )
4646 {
4647 g_dCollWeights_UTF8CI[i+0x800] = (unsigned short)( 0x2100 + i - ( i>=0x70 && i<=0x7f )*16 ); // 2170..217f, -16
4648 g_dCollWeights_UTF8CI[i+0x900] = (unsigned short)( 0x2400 + i - ( i>=0xd0 && i<=0xe9 )*26 ); // 24d0..24e9, -26
4649 g_dCollWeights_UTF8CI[i+0xa00] = (unsigned short)( 0xff00 + i - ( i>=0x41 && i<=0x5a )*32 ); // ff41..ff5a, -32
4650 }
4651
4652 // generate planes table
4653 for ( int i=0; i<0x100; i++ )
4654 g_dCollPlanes_UTF8CI[i] = NULL;
4655
4656 for ( int i=0; i<0x0b; i++ )
4657 g_dCollPlanes_UTF8CI [ dWeightPlane[i] ] = g_dCollWeights_UTF8CI + 0x100*i;
4658 }
4659
4660
4661 /// collate a single codepoint
CollateUTF8CI(int iCode)4662 static inline int CollateUTF8CI ( int iCode )
4663 {
4664 return ( ( iCode>>16 ) || !g_dCollPlanes_UTF8CI [ iCode>>8 ] )
4665 ? iCode
4666 : g_dCollPlanes_UTF8CI [ iCode>>8 ][ iCode&0xff ];
4667 }
4668
4669
4670 /// utf8_general_ci
sphCollateUtf8GeneralCI(const BYTE * pArg1,const BYTE * pArg2,bool bPacked)4671 int sphCollateUtf8GeneralCI ( const BYTE * pArg1, const BYTE * pArg2, bool bPacked )
4672 {
4673 const BYTE * pStr1 = pArg1;
4674 const BYTE * pStr2 = pArg2;
4675 const BYTE * pMax1 = NULL;
4676 const BYTE * pMax2 = NULL;
4677 if ( bPacked )
4678 {
4679 int iLen1 = sphUnpackStr ( pStr1, (const BYTE**)&pStr1 );
4680 int iLen2 = sphUnpackStr ( pStr2, (const BYTE**)&pStr2 );
4681 pMax1 = pStr1 + iLen1;
4682 pMax2 = pStr2 + iLen2;
4683 }
4684
4685 while ( ( bPacked && pStr1<pMax1 && pStr2<pMax2 ) || ( !bPacked && *pStr1 && *pStr2 ) )
4686 {
4687 // FIXME! on broken data, decode might go beyond buffer bounds
4688 int iCode1 = sphUTF8Decode ( pStr1 );
4689 int iCode2 = sphUTF8Decode ( pStr2 );
4690 if ( !iCode1 && !iCode2 )
4691 return 0;
4692 if ( !iCode1 || !iCode2 )
4693 return !iCode1 ? -1 : 1;
4694
4695 if ( iCode1==iCode2 )
4696 continue;
4697 iCode1 = CollateUTF8CI ( iCode1 );
4698 iCode2 = CollateUTF8CI ( iCode2 );
4699 if ( iCode1!=iCode2 )
4700 return iCode1-iCode2;
4701 }
4702
4703 if ( bPacked )
4704 {
4705 if ( pStr1>=pMax1 && pStr2>=pMax2 )
4706 return 0;
4707 return ( pStr1<pMax1 ) ? 1 : -1;
4708 } else
4709 {
4710 if ( !*pStr1 && !*pStr2 )
4711 return 0;
4712 return ( *pStr1 ? 1 : -1 );
4713 }
4714 }
4715
4716
4717 /////////////////////////////
4718 // hashing functions
4719 /////////////////////////////
4720
4721
4722 class LibcCSHash_fn
4723 {
4724 public:
4725 mutable CSphTightVector<BYTE> m_dBuf;
4726 static const int LOCALE_SAFE_GAP = 16;
4727
LibcCSHash_fn()4728 LibcCSHash_fn()
4729 {
4730 m_dBuf.Resize ( COLLATE_STACK_BUFFER );
4731 }
4732
Hash(const BYTE * pStr,int iLen,uint64_t uPrev=SPH_FNV64_SEED) const4733 uint64_t Hash ( const BYTE * pStr, int iLen, uint64_t uPrev=SPH_FNV64_SEED ) const
4734 {
4735 assert ( pStr && iLen );
4736
4737 int iCompositeLen = iLen + 1 + (int)( 3.0f * iLen ) + LOCALE_SAFE_GAP;
4738 if ( m_dBuf.GetLength()<iCompositeLen )
4739 m_dBuf.Resize ( iCompositeLen );
4740
4741 memcpy ( m_dBuf.Begin(), pStr, iLen );
4742 m_dBuf[iLen] = '\0';
4743
4744 BYTE * pDst = m_dBuf.Begin()+iLen+1;
4745 int iDstAvailable = m_dBuf.GetLength() - iLen - LOCALE_SAFE_GAP;
4746
4747 int iDstLen = strxfrm ( (char *)pDst, (const char *)m_dBuf.Begin(), iDstAvailable );
4748 assert ( iDstLen<iDstAvailable+LOCALE_SAFE_GAP );
4749
4750 uint64_t uAcc = sphFNV64 ( pDst, iDstLen, uPrev );
4751
4752 return uAcc;
4753 }
4754 };
4755
4756
4757 class LibcCIHash_fn
4758 {
4759 public:
Hash(const BYTE * pStr,int iLen,uint64_t uPrev=SPH_FNV64_SEED) const4760 uint64_t Hash ( const BYTE * pStr, int iLen, uint64_t uPrev=SPH_FNV64_SEED ) const
4761 {
4762 assert ( pStr && iLen );
4763
4764 uint64_t uAcc = uPrev;
4765 while ( iLen-- )
4766 {
4767 int iChar = tolower ( *pStr++ );
4768 uAcc = sphFNV64 ( &iChar, 4, uAcc );
4769 }
4770
4771 return uAcc;
4772 }
4773 };
4774
4775
4776 class Utf8CIHash_fn
4777 {
4778 public:
Hash(const BYTE * pStr,int iLen,uint64_t uPrev=SPH_FNV64_SEED) const4779 uint64_t Hash ( const BYTE * pStr, int iLen, uint64_t uPrev=SPH_FNV64_SEED ) const
4780 {
4781 assert ( pStr && iLen );
4782
4783 uint64_t uAcc = uPrev;
4784 while ( iLen-- )
4785 {
4786 const BYTE * pCur = pStr++;
4787 int iCode = sphUTF8Decode ( pCur );
4788 iCode = CollateUTF8CI ( iCode );
4789 uAcc = sphFNV64 ( &iCode, 4, uAcc );
4790 }
4791
4792 return uAcc;
4793 }
4794 };
4795
4796
sphCreateGrouperString(const CSphAttrLocator & tLoc,ESphCollation eCollation)4797 CSphGrouper * sphCreateGrouperString ( const CSphAttrLocator & tLoc, ESphCollation eCollation )
4798 {
4799 if ( eCollation==SPH_COLLATION_UTF8_GENERAL_CI )
4800 return new CSphGrouperString<Utf8CIHash_fn> ( tLoc );
4801 else if ( eCollation==SPH_COLLATION_LIBC_CI )
4802 return new CSphGrouperString<LibcCIHash_fn> ( tLoc );
4803 else if ( eCollation==SPH_COLLATION_LIBC_CS )
4804 return new CSphGrouperString<LibcCSHash_fn> ( tLoc );
4805 else
4806 return new CSphGrouperString<BinaryHash_fn> ( tLoc );
4807 }
4808
sphCreateGrouperMulti(const CSphVector<CSphAttrLocator> & dLocators,const CSphVector<ESphAttr> & dAttrTypes,const CSphVector<ISphExpr * > & dJsonKeys,ESphCollation eCollation)4809 CSphGrouper * sphCreateGrouperMulti ( const CSphVector<CSphAttrLocator> & dLocators, const CSphVector<ESphAttr> & dAttrTypes,
4810 const CSphVector<ISphExpr *> & dJsonKeys, ESphCollation eCollation )
4811 {
4812 if ( eCollation==SPH_COLLATION_UTF8_GENERAL_CI )
4813 return new CSphGrouperMulti<Utf8CIHash_fn> ( dLocators, dAttrTypes, dJsonKeys );
4814 else if ( eCollation==SPH_COLLATION_LIBC_CI )
4815 return new CSphGrouperMulti<LibcCIHash_fn> ( dLocators, dAttrTypes, dJsonKeys );
4816 else if ( eCollation==SPH_COLLATION_LIBC_CS )
4817 return new CSphGrouperMulti<LibcCSHash_fn> ( dLocators, dAttrTypes, dJsonKeys );
4818 else
4819 return new CSphGrouperMulti<BinaryHash_fn> ( dLocators, dAttrTypes, dJsonKeys );
4820 }
4821
4822
4823 /////////////////////////
4824 // SORTING QUEUE FACTORY
4825 /////////////////////////
4826
4827 template < typename COMP >
CreatePlainSorter(bool bKbuffer,int iMaxMatches,bool bUsesAttrs,bool bFactors)4828 static ISphMatchSorter * CreatePlainSorter ( bool bKbuffer, int iMaxMatches, bool bUsesAttrs, bool bFactors )
4829 {
4830 if ( bKbuffer )
4831 {
4832 if ( bFactors )
4833 return new CSphKbufferMatchQueue<COMP, true> ( iMaxMatches, bUsesAttrs );
4834 else
4835 return new CSphKbufferMatchQueue<COMP, false> ( iMaxMatches, bUsesAttrs );
4836 } else
4837 {
4838 if ( bFactors )
4839 return new CSphMatchQueue<COMP, true> ( iMaxMatches, bUsesAttrs );
4840 else
4841 return new CSphMatchQueue<COMP, false> ( iMaxMatches, bUsesAttrs );
4842 }
4843 }
4844
4845
CreatePlainSorter(ESphSortFunc eMatchFunc,bool bKbuffer,int iMaxMatches,bool bUsesAttrs,bool bFactors)4846 static ISphMatchSorter * CreatePlainSorter ( ESphSortFunc eMatchFunc, bool bKbuffer, int iMaxMatches, bool bUsesAttrs, bool bFactors )
4847 {
4848 switch ( eMatchFunc )
4849 {
4850 case FUNC_REL_DESC: return CreatePlainSorter<MatchRelevanceLt_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4851 case FUNC_ATTR_DESC: return CreatePlainSorter<MatchAttrLt_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4852 case FUNC_ATTR_ASC: return CreatePlainSorter<MatchAttrGt_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4853 case FUNC_TIMESEGS: return CreatePlainSorter<MatchTimeSegments_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4854 case FUNC_GENERIC2: return CreatePlainSorter<MatchGeneric2_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4855 case FUNC_GENERIC3: return CreatePlainSorter<MatchGeneric3_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4856 case FUNC_GENERIC4: return CreatePlainSorter<MatchGeneric4_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4857 case FUNC_GENERIC5: return CreatePlainSorter<MatchGeneric5_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4858 case FUNC_EXPR: return CreatePlainSorter<MatchExpr_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
4859 default: return NULL;
4860 }
4861 }
4862
4863
ExtraAddSortkeys(CSphSchema * pExtra,const ISphSchema & tSorterSchema,const int * dAttrs)4864 static void ExtraAddSortkeys ( CSphSchema * pExtra, const ISphSchema & tSorterSchema, const int * dAttrs )
4865 {
4866 if ( pExtra )
4867 for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
4868 if ( dAttrs[i]>=0 )
4869 pExtra->AddAttr ( tSorterSchema.GetAttr ( dAttrs[i] ), true );
4870 }
4871
4872
sphCreateQueue(SphQueueSettings_t & tQueue)4873 ISphMatchSorter * sphCreateQueue ( SphQueueSettings_t & tQueue )
4874 {
4875 // prepare for descent
4876 ISphMatchSorter * pTop = NULL;
4877 CSphMatchComparatorState tStateMatch, tStateGroup;
4878
4879 // short-cuts
4880 const CSphQuery * pQuery = &tQueue.m_tQuery;
4881 const ISphSchema & tSchema = tQueue.m_tSchema;
4882 CSphString & sError = tQueue.m_sError;
4883 CSphQueryProfile * pProfiler = tQueue.m_pProfiler;
4884 CSphSchema * pExtra = tQueue.m_pExtra;
4885
4886 sError = "";
4887 bool bHasZonespanlist = false;
4888 bool bNeedZonespanlist = false;
4889 DWORD uPackedFactorFlags = SPH_FACTOR_DISABLE;
4890
4891 ///////////////////////////////////////
4892 // build incoming and outgoing schemas
4893 ///////////////////////////////////////
4894
4895 // sorter schema
4896 // adds computed expressions and groupby stuff on top of the original index schema
4897 CSphRsetSchema tSorterSchema;
4898 tSorterSchema = tSchema;
4899
4900 CSphVector<uint64_t> dQueryAttrs;
4901
4902 // we need this to perform a sanity check
4903 bool bHasGroupByExpr = false;
4904
4905 // setup overrides, detach them into dynamic part
4906 ARRAY_FOREACH ( i, pQuery->m_dOverrides )
4907 {
4908 const char * sAttr = pQuery->m_dOverrides[i].m_sAttr.cstr();
4909
4910 int iIndex = tSorterSchema.GetAttrIndex ( sAttr );
4911 if ( iIndex<0 )
4912 {
4913 sError.SetSprintf ( "override attribute '%s' not found", sAttr );
4914 return NULL;
4915 }
4916
4917 CSphColumnInfo tCol;
4918 tCol = tSorterSchema.GetAttr ( iIndex );
4919 tCol.m_eStage = SPH_EVAL_OVERRIDE;
4920 tSorterSchema.AddDynamicAttr ( tCol );
4921 if ( pExtra )
4922 pExtra->AddAttr ( tCol, true );
4923 tSorterSchema.RemoveStaticAttr ( iIndex );
4924
4925 dQueryAttrs.Add ( sphFNV64 ( tCol.m_sName.cstr() ) );
4926 }
4927
4928 // setup @geodist
4929 if ( pQuery->m_bGeoAnchor && tSorterSchema.GetAttrIndex ( "@geodist" )<0 )
4930 {
4931 ExprGeodist_t * pExpr = new ExprGeodist_t ();
4932 if ( !pExpr->Setup ( pQuery, tSorterSchema, sError ) )
4933 {
4934 pExpr->Release ();
4935 return NULL;
4936 }
4937 CSphColumnInfo tCol ( "@geodist", SPH_ATTR_FLOAT );
4938 tCol.m_pExpr = pExpr; // takes ownership, no need to for explicit pExpr release
4939 tCol.m_eStage = SPH_EVAL_PREFILTER; // OPTIMIZE? actual stage depends on usage
4940 tSorterSchema.AddDynamicAttr ( tCol );
4941 if ( pExtra )
4942 pExtra->AddAttr ( tCol, true );
4943
4944 dQueryAttrs.Add ( sphFNV64 ( tCol.m_sName.cstr() ) );
4945 }
4946
4947 // setup @expr
4948 if ( pQuery->m_eSort==SPH_SORT_EXPR && tSorterSchema.GetAttrIndex ( "@expr" )<0 )
4949 {
4950 CSphColumnInfo tCol ( "@expr", SPH_ATTR_FLOAT ); // enforce float type for backwards compatibility (ie. too lazy to fix those tests right now)
4951 tCol.m_pExpr = sphExprParse ( pQuery->m_sSortBy.cstr(), tSorterSchema, NULL, NULL, sError, pProfiler, pQuery->m_eCollation, NULL, &bHasZonespanlist );
4952 bNeedZonespanlist |= bHasZonespanlist;
4953 if ( !tCol.m_pExpr )
4954 return NULL;
4955 tCol.m_eStage = SPH_EVAL_PRESORT;
4956 tSorterSchema.AddDynamicAttr ( tCol );
4957
4958 dQueryAttrs.Add ( sphFNV64 ( tCol.m_sName.cstr() ) );
4959 }
4960
4961 // expressions from select items
4962 bool bHasCount = false;
4963
4964 if ( tQueue.m_bComputeItems )
4965 ARRAY_FOREACH ( iItem, pQuery->m_dItems )
4966 {
4967 const CSphQueryItem & tItem = pQuery->m_dItems[iItem];
4968 const CSphString & sExpr = tItem.m_sExpr;
4969 bool bIsCount = IsCount(sExpr);
4970 bHasCount |= bIsCount;
4971
4972 if ( sExpr=="*" )
4973 {
4974 for ( int i=0; i<tSchema.GetAttrsCount(); i++ )
4975 dQueryAttrs.Add ( sphFNV64 ( tSchema.GetAttr(i).m_sName.cstr() ) );
4976 }
4977
4978 // for now, just always pass "plain" attrs from index to sorter; they will be filtered on searchd level
4979 int iAttrIdx = tSchema.GetAttrIndex ( sExpr.cstr() );
4980 bool bPlainAttr = ( ( sExpr=="*" || ( iAttrIdx>=0 && tItem.m_eAggrFunc==SPH_AGGR_NONE ) ) &&
4981 ( tItem.m_sAlias.IsEmpty() || tItem.m_sAlias==tItem.m_sExpr ) );
4982 if ( iAttrIdx>=0 )
4983 {
4984 ESphAttr eAttr = tSchema.GetAttr ( iAttrIdx ).m_eAttrType;
4985 if ( eAttr==SPH_ATTR_STRING || eAttr==SPH_ATTR_UINT32SET || eAttr==SPH_ATTR_INT64SET )
4986 {
4987 if ( tItem.m_eAggrFunc!=SPH_AGGR_NONE )
4988 {
4989 sError.SetSprintf ( "can not aggregate non-scalar attribute '%s'", tItem.m_sExpr.cstr() );
4990 return NULL;
4991 }
4992
4993 if ( !bPlainAttr )
4994 {
4995 bPlainAttr = true;
4996 for ( int i=0; i<iItem && bPlainAttr; i++ )
4997 if ( sExpr==pQuery->m_dItems[i].m_sAlias )
4998 bPlainAttr = false;
4999 }
5000 }
5001 }
5002
5003 if ( bPlainAttr || IsGroupby ( sExpr ) || bIsCount )
5004 {
5005 bHasGroupByExpr = IsGroupby ( sExpr );
5006 continue;
5007 }
5008
5009 // not an attribute? must be an expression, and must be aliased by query parser
5010 assert ( !tItem.m_sAlias.IsEmpty() );
5011
5012 // tricky part
5013 // we might be fed with precomputed matches, but it's all or nothing
5014 // the incoming match either does not have anything computed, or it has everything
5015 int iSorterAttr = tSorterSchema.GetAttrIndex ( tItem.m_sAlias.cstr() );
5016 if ( iSorterAttr>=0 )
5017 {
5018 if ( dQueryAttrs.Contains ( sphFNV64 ( tItem.m_sAlias.cstr() ) ) )
5019 {
5020 sError.SetSprintf ( "alias '%s' must be unique (conflicts with another alias)", tItem.m_sAlias.cstr() );
5021 return NULL;
5022 } else
5023 {
5024 tSorterSchema.RemoveStaticAttr ( iSorterAttr );
5025 }
5026 }
5027
5028 // a new and shiny expression, lets parse
5029 CSphColumnInfo tExprCol ( tItem.m_sAlias.cstr(), SPH_ATTR_NONE );
5030 DWORD uQueryPackedFactorFlags = SPH_FACTOR_DISABLE;
5031
5032 // tricky bit
5033 // GROUP_CONCAT() adds an implicit TO_STRING() conversion on top of its argument
5034 // and then the aggregate operation simply concatenates strings as matches arrive
5035 // ideally, we would instead pass ownership of the expression to G_C() implementation
5036 // and also the original expression type, and let the string conversion happen in G_C() itself
5037 // but that ideal route seems somewhat more complicated in the current architecture
5038 if ( tItem.m_eAggrFunc==SPH_AGGR_CAT )
5039 {
5040 CSphString sExpr2;
5041 sExpr2.SetSprintf ( "TO_STRING(%s)", sExpr.cstr() );
5042 tExprCol.m_pExpr = sphExprParse ( sExpr2.cstr(), tSorterSchema, &tExprCol.m_eAttrType,
5043 &tExprCol.m_bWeight, sError, pProfiler, pQuery->m_eCollation, tQueue.m_pHook, &bHasZonespanlist, &uQueryPackedFactorFlags, &tExprCol.m_eStage );
5044 } else
5045 {
5046 tExprCol.m_pExpr = sphExprParse ( sExpr.cstr(), tSorterSchema, &tExprCol.m_eAttrType,
5047 &tExprCol.m_bWeight, sError, pProfiler, pQuery->m_eCollation, tQueue.m_pHook, &bHasZonespanlist, &uQueryPackedFactorFlags, &tExprCol.m_eStage );
5048 }
5049
5050 uPackedFactorFlags |= uQueryPackedFactorFlags;
5051 bNeedZonespanlist |= bHasZonespanlist;
5052 tExprCol.m_eAggrFunc = tItem.m_eAggrFunc;
5053 if ( !tExprCol.m_pExpr )
5054 {
5055 sError.SetSprintf ( "parse error: %s", sError.cstr() );
5056 return NULL;
5057 }
5058
5059 // force AVG() to be computed in floats
5060 if ( tExprCol.m_eAggrFunc==SPH_AGGR_AVG )
5061 {
5062 tExprCol.m_eAttrType = SPH_ATTR_FLOAT;
5063 tExprCol.m_tLocator.m_iBitCount = 32;
5064 }
5065
5066 // force explicit type conversion for JSON attributes
5067 if ( tExprCol.m_eAggrFunc!=SPH_AGGR_NONE && tExprCol.m_eAttrType==SPH_ATTR_JSON_FIELD )
5068 {
5069 sError.SetSprintf ( "ambiguous attribute type '%s', use INTEGER(), BIGINT() or DOUBLE() conversion functions", tItem.m_sExpr.cstr() );
5070 return NULL;
5071 }
5072
5073 if ( uQueryPackedFactorFlags & SPH_FACTOR_JSON_OUT )
5074 tExprCol.m_eAttrType = SPH_ATTR_FACTORS_JSON;
5075
5076 // force GROUP_CONCAT() to be computed as strings
5077 if ( tExprCol.m_eAggrFunc==SPH_AGGR_CAT )
5078 {
5079 tExprCol.m_eAttrType = SPH_ATTR_STRINGPTR;
5080 tExprCol.m_tLocator.m_iBitCount = ROWITEMPTR_BITS;
5081 }
5082
5083 // postpone aggregates, add non-aggregates
5084 if ( tExprCol.m_eAggrFunc==SPH_AGGR_NONE )
5085 {
5086 // is this expression used in filter?
5087 // OPTIMIZE? hash filters and do hash lookups?
5088 if ( tExprCol.m_eAttrType!=SPH_ATTR_JSON_FIELD )
5089 ARRAY_FOREACH ( i, pQuery->m_dFilters )
5090 if ( pQuery->m_dFilters[i].m_sAttrName==tExprCol.m_sName )
5091 {
5092 // is this a hack?
5093 // m_bWeight is computed after EarlyReject() get called
5094 // that means we can't evaluate expressions with WEIGHT() in prefilter phase
5095 if ( tExprCol.m_bWeight )
5096 {
5097 tExprCol.m_eStage = SPH_EVAL_PRESORT; // special, weight filter ( short cut )
5098 break;
5099 }
5100
5101 // so we are about to add a filter condition
5102 // but it might depend on some preceding columns (e.g. SELECT 1+attr f1 ... WHERE f1>5)
5103 // lets detect those and move them to prefilter \ presort phase too
5104 CSphVector<int> dCur;
5105 tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
5106
5107 // usual filter
5108 tExprCol.m_eStage = SPH_EVAL_PREFILTER;
5109 ARRAY_FOREACH ( j, dCur )
5110 {
5111 const CSphColumnInfo & tCol = tSorterSchema.GetAttr ( dCur[j] );
5112 if ( tCol.m_bWeight )
5113 {
5114 tExprCol.m_eStage = SPH_EVAL_PRESORT;
5115 tExprCol.m_bWeight = true;
5116 }
5117 // handle chains of dependencies (e.g. SELECT 1+attr f1, f1-1 f2 ... WHERE f2>5)
5118 if ( tCol.m_pExpr.Ptr() )
5119 {
5120 tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
5121 }
5122 }
5123 dCur.Uniq();
5124
5125 ARRAY_FOREACH ( j, dCur )
5126 {
5127 CSphColumnInfo & tDep = const_cast < CSphColumnInfo & > ( tSorterSchema.GetAttr ( dCur[j] ) );
5128 if ( tDep.m_eStage>tExprCol.m_eStage )
5129 tDep.m_eStage = tExprCol.m_eStage;
5130 }
5131 break;
5132 }
5133
5134 // add it!
5135 // NOTE, "final" stage might need to be fixed up later
5136 // we'll do that when parsing sorting clause
5137 tSorterSchema.AddDynamicAttr ( tExprCol );
5138 } else // some aggregate
5139 {
5140 tExprCol.m_eStage = SPH_EVAL_PRESORT; // sorter expects computed expression
5141 tSorterSchema.AddDynamicAttr ( tExprCol );
5142 if ( pExtra )
5143 pExtra->AddAttr ( tExprCol, true );
5144
5145 /// update aggregate dependencies (e.g. SELECT 1+attr f1, min(f1), ...)
5146 CSphVector<int> dCur;
5147 tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
5148
5149 ARRAY_FOREACH ( j, dCur )
5150 {
5151 const CSphColumnInfo & tCol = tSorterSchema.GetAttr ( dCur[j] );
5152 if ( tCol.m_pExpr.Ptr() )
5153 tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
5154 }
5155 dCur.Uniq();
5156
5157 ARRAY_FOREACH ( j, dCur )
5158 {
5159 CSphColumnInfo & tDep = const_cast < CSphColumnInfo & > ( tSorterSchema.GetAttr ( dCur[j] ) );
5160 if ( tDep.m_eStage>tExprCol.m_eStage )
5161 tDep.m_eStage = tExprCol.m_eStage;
5162 }
5163 }
5164 dQueryAttrs.Add ( sphFNV64 ( (const BYTE *)tExprCol.m_sName.cstr() ) );
5165 }
5166
5167 ////////////////////////////////////////////
5168 // setup groupby settings, create shortcuts
5169 ////////////////////////////////////////////
5170
5171 CSphVector<int> dGroupColumns;
5172 CSphGroupSorterSettings tSettings;
5173 bool bImplicit = false;
5174
5175 if ( pQuery->m_sGroupBy.IsEmpty() )
5176 ARRAY_FOREACH_COND ( i, pQuery->m_dItems, !bImplicit )
5177 {
5178 const CSphQueryItem & t = pQuery->m_dItems[i];
5179 bImplicit = ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr=="@distinct";
5180 }
5181
5182 if ( !SetupGroupbySettings ( pQuery, tSorterSchema, tSettings, dGroupColumns, sError, bImplicit ) )
5183 return NULL;
5184
5185 const bool bGotGroupby = !pQuery->m_sGroupBy.IsEmpty() || tSettings.m_bImplicit; // or else, check in SetupGroupbySettings() would already fail
5186 const bool bGotDistinct = ( tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
5187
5188 if ( bHasGroupByExpr && !bGotGroupby )
5189 {
5190 sError = "GROUPBY() is allowed only in GROUP BY queries";
5191 return NULL;
5192 }
5193
5194 // check for HAVING constrains
5195 if ( tQueue.m_pAggrFilter && !tQueue.m_pAggrFilter->m_sAttrName.IsEmpty() )
5196 {
5197 if ( !bGotGroupby )
5198 {
5199 sError.SetSprintf ( "can not use HAVING without GROUP BY" );
5200 return NULL;
5201 }
5202
5203 // should be column named at group by or it's alias or aggregate
5204 const CSphString & sHaving = tQueue.m_pAggrFilter->m_sAttrName;
5205 if ( !IsGroupbyMagic ( sHaving ) )
5206 {
5207 bool bValidHaving = false;
5208 ARRAY_FOREACH ( i, pQuery->m_dItems )
5209 {
5210 const CSphQueryItem & tItem = pQuery->m_dItems[i];
5211 if ( tItem.m_sAlias!=sHaving )
5212 continue;
5213
5214 bValidHaving = ( IsGroupbyMagic ( tItem.m_sExpr ) || tItem.m_eAggrFunc!=SPH_AGGR_NONE );
5215 break;
5216 }
5217
5218 if ( !bValidHaving )
5219 {
5220 sError.SetSprintf ( "can not use HAVING with attribute not related to GROUP BY" );
5221 return NULL;
5222 }
5223 }
5224 }
5225
5226 // now lets add @groupby etc if needed
5227 if ( bGotGroupby && tSorterSchema.GetAttrIndex ( "@groupby" )<0 )
5228 {
5229 ESphAttr eGroupByResult = ( !tSettings.m_bImplicit ) ? tSettings.m_pGrouper->GetResultType() : SPH_ATTR_INTEGER; // implicit do not have grouper
5230 if ( tSettings.m_bMva64 )
5231 eGroupByResult = SPH_ATTR_BIGINT;
5232
5233 CSphColumnInfo tGroupby ( "@groupby", eGroupByResult );
5234 CSphColumnInfo tCount ( "@count", SPH_ATTR_INTEGER );
5235 CSphColumnInfo tDistinct ( "@distinct", SPH_ATTR_INTEGER );
5236
5237 tGroupby.m_eStage = SPH_EVAL_SORTER;
5238 tCount.m_eStage = SPH_EVAL_SORTER;
5239 tDistinct.m_eStage = SPH_EVAL_SORTER;
5240
5241 tSorterSchema.AddDynamicAttr ( tGroupby );
5242 tSorterSchema.AddDynamicAttr ( tCount );
5243 if ( pExtra )
5244 {
5245 pExtra->AddAttr ( tGroupby, true );
5246 pExtra->AddAttr ( tCount, true );
5247 }
5248 if ( bGotDistinct )
5249 {
5250 tSorterSchema.AddDynamicAttr ( tDistinct );
5251 if ( pExtra )
5252 pExtra->AddAttr ( tDistinct, true );
5253 }
5254
5255 // add @groupbystr last in case we need to skip it on sending (like @int_str2ptr_*)
5256 if ( tSettings.m_bJson )
5257 {
5258 CSphColumnInfo tGroupbyStr ( "@groupbystr", SPH_ATTR_JSON_FIELD );
5259 tGroupbyStr.m_eStage = SPH_EVAL_SORTER;
5260 tSorterSchema.AddDynamicAttr ( tGroupbyStr );
5261 }
5262 }
5263
5264 #define LOC_CHECK(_cond,_msg) if (!(_cond)) { sError = "invalid schema: " _msg; return NULL; }
5265
5266 int iGroupby = tSorterSchema.GetAttrIndex ( "@groupby" );
5267 if ( iGroupby>=0 )
5268 {
5269 tSettings.m_bDistinct = bGotDistinct;
5270 tSettings.m_tLocGroupby = tSorterSchema.GetAttr ( iGroupby ).m_tLocator;
5271 LOC_CHECK ( tSettings.m_tLocGroupby.m_bDynamic, "@groupby must be dynamic" );
5272
5273 int iCount = tSorterSchema.GetAttrIndex ( "@count" );
5274 LOC_CHECK ( iCount>=0, "missing @count" );
5275
5276 tSettings.m_tLocCount = tSorterSchema.GetAttr ( iCount ).m_tLocator;
5277 LOC_CHECK ( tSettings.m_tLocCount.m_bDynamic, "@count must be dynamic" );
5278
5279 int iDistinct = tSorterSchema.GetAttrIndex ( "@distinct" );
5280 if ( bGotDistinct )
5281 {
5282 LOC_CHECK ( iDistinct>=0, "missing @distinct" );
5283 tSettings.m_tLocDistinct = tSorterSchema.GetAttr ( iDistinct ).m_tLocator;
5284 LOC_CHECK ( tSettings.m_tLocDistinct.m_bDynamic, "@distinct must be dynamic" );
5285 } else
5286 {
5287 LOC_CHECK ( iDistinct<=0, "unexpected @distinct" );
5288 }
5289
5290 int iGroupbyStr = tSorterSchema.GetAttrIndex ( "@groupbystr" );
5291 if ( iGroupbyStr>=0 )
5292 tSettings.m_tLocGroupbyStr = tSorterSchema.GetAttr ( iGroupbyStr ).m_tLocator;
5293 }
5294
5295 if ( bHasCount )
5296 {
5297 LOC_CHECK ( tSorterSchema.GetAttrIndex ( "@count" )>=0, "Count(*) or @count is queried, but not available in the schema" );
5298 }
5299
5300 #undef LOC_CHECK
5301
5302 ////////////////////////////////////
5303 // choose and setup sorting functor
5304 ////////////////////////////////////
5305
5306 ESphSortFunc eMatchFunc = FUNC_REL_DESC;
5307 ESphSortFunc eGroupFunc = FUNC_REL_DESC;
5308 bool bUsesAttrs = false;
5309 bool bRandomize = false;
5310
5311 // matches sorting function
5312 if ( pQuery->m_eSort==SPH_SORT_EXTENDED )
5313 {
5314 ESortClauseParseResult eRes = sphParseSortClause ( pQuery, pQuery->m_sSortBy.cstr(),
5315 tSorterSchema, eMatchFunc, tStateMatch, sError );
5316
5317 if ( eRes==SORT_CLAUSE_ERROR )
5318 return NULL;
5319
5320 if ( eRes==SORT_CLAUSE_RANDOM )
5321 bRandomize = true;
5322
5323 ExtraAddSortkeys ( pExtra, tSorterSchema, tStateMatch.m_dAttrs );
5324
5325 bUsesAttrs = FixupDependency ( tSorterSchema, tStateMatch.m_dAttrs, CSphMatchComparatorState::MAX_ATTRS );
5326 if ( !bUsesAttrs )
5327 {
5328 for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
5329 {
5330 ESphSortKeyPart ePart = tStateMatch.m_eKeypart[i];
5331 if ( ePart==SPH_KEYPART_INT || ePart==SPH_KEYPART_FLOAT || ePart==SPH_KEYPART_STRING || ePart==SPH_KEYPART_STRINGPTR )
5332 bUsesAttrs = true;
5333 }
5334 }
5335 SetupSortRemap ( tSorterSchema, tStateMatch );
5336
5337 } else if ( pQuery->m_eSort==SPH_SORT_EXPR )
5338 {
5339 tStateMatch.m_eKeypart[0] = SPH_KEYPART_INT;
5340 tStateMatch.m_tLocator[0] = tSorterSchema.GetAttr ( tSorterSchema.GetAttrIndex ( "@expr" ) ).m_tLocator;
5341 tStateMatch.m_eKeypart[1] = SPH_KEYPART_ID;
5342 tStateMatch.m_uAttrDesc = 1;
5343 eMatchFunc = FUNC_EXPR;
5344 bUsesAttrs = true;
5345
5346 } else
5347 {
5348 // check sort-by attribute
5349 if ( pQuery->m_eSort!=SPH_SORT_RELEVANCE )
5350 {
5351 int iSortAttr = tSorterSchema.GetAttrIndex ( pQuery->m_sSortBy.cstr() );
5352 if ( iSortAttr<0 )
5353 {
5354 sError.SetSprintf ( "sort-by attribute '%s' not found", pQuery->m_sSortBy.cstr() );
5355 return NULL;
5356 }
5357 const CSphColumnInfo & tAttr = tSorterSchema.GetAttr ( iSortAttr );
5358 tStateMatch.m_eKeypart[0] = Attr2Keypart ( tAttr.m_eAttrType );
5359 tStateMatch.m_tLocator[0] = tAttr.m_tLocator;
5360 tStateMatch.m_dAttrs[0] = iSortAttr;
5361 SetupSortRemap ( tSorterSchema, tStateMatch );
5362 }
5363
5364 // find out what function to use and whether it needs attributes
5365 bUsesAttrs = true;
5366 switch ( pQuery->m_eSort )
5367 {
5368 case SPH_SORT_ATTR_DESC: eMatchFunc = FUNC_ATTR_DESC; break;
5369 case SPH_SORT_ATTR_ASC: eMatchFunc = FUNC_ATTR_ASC; break;
5370 case SPH_SORT_TIME_SEGMENTS: eMatchFunc = FUNC_TIMESEGS; break;
5371 case SPH_SORT_RELEVANCE: eMatchFunc = FUNC_REL_DESC; bUsesAttrs = false; break;
5372 default:
5373 sError.SetSprintf ( "unknown sorting mode %d", pQuery->m_eSort );
5374 return NULL;
5375 }
5376 }
5377
5378 // groups sorting function
5379 if ( bGotGroupby )
5380 {
5381 ESortClauseParseResult eRes = sphParseSortClause ( pQuery, pQuery->m_sGroupSortBy.cstr(),
5382 tSorterSchema, eGroupFunc, tStateGroup, sError );
5383
5384 if ( eRes==SORT_CLAUSE_ERROR || eRes==SORT_CLAUSE_RANDOM )
5385 {
5386 if ( eRes==SORT_CLAUSE_RANDOM )
5387 sError.SetSprintf ( "groups can not be sorted by @random" );
5388 return NULL;
5389 }
5390
5391 ExtraAddSortkeys ( pExtra, tSorterSchema, tStateGroup.m_dAttrs );
5392
5393 assert ( dGroupColumns.GetLength() || tSettings.m_bImplicit );
5394 if ( pExtra && !tSettings.m_bImplicit )
5395 {
5396 ARRAY_FOREACH ( i, dGroupColumns )
5397 pExtra->AddAttr ( tSorterSchema.GetAttr ( dGroupColumns[i] ), true );
5398 }
5399
5400 if ( bGotDistinct )
5401 {
5402 dGroupColumns.Add ( tSorterSchema.GetAttrIndex ( pQuery->m_sGroupDistinct.cstr() ) );
5403 assert ( dGroupColumns.Last()>=0 );
5404 if ( pExtra )
5405 pExtra->AddAttr ( tSorterSchema.GetAttr ( dGroupColumns.Last() ), true );
5406 }
5407
5408 if ( dGroupColumns.GetLength() ) // implicit case
5409 {
5410 FixupDependency ( tSorterSchema, dGroupColumns.Begin(), dGroupColumns.GetLength() );
5411 }
5412 FixupDependency ( tSorterSchema, tStateGroup.m_dAttrs, CSphMatchComparatorState::MAX_ATTRS );
5413
5414 // GroupSortBy str attributes setup
5415 SetupSortRemap ( tSorterSchema, tStateGroup );
5416 }
5417
5418 // set up aggregate filter for grouper
5419 if ( bGotGroupby && tQueue.m_pAggrFilter && !tQueue.m_pAggrFilter->m_sAttrName.IsEmpty() )
5420 {
5421 if ( tSorterSchema.GetAttr ( tQueue.m_pAggrFilter->m_sAttrName.cstr() ) )
5422 {
5423 tSettings.m_pAggrFilterTrait = sphCreateAggrFilter ( tQueue.m_pAggrFilter, tQueue.m_pAggrFilter->m_sAttrName, tSorterSchema, sError );
5424 } else
5425 {
5426 // having might reference aliased attributes but @* attributes got stored without alias in sorter schema
5427 CSphString sHaving;
5428 ARRAY_FOREACH ( i, pQuery->m_dItems )
5429 {
5430 const CSphQueryItem & tItem = pQuery->m_dItems[i];
5431 if ( tItem.m_sAlias==tQueue.m_pAggrFilter->m_sAttrName )
5432 {
5433 sHaving = tItem.m_sExpr;
5434 break;
5435 }
5436 }
5437
5438 if ( sHaving=="groupby()" )
5439 sHaving = "@groupby";
5440 else if ( sHaving=="count(*)" )
5441 sHaving = "@count";
5442
5443 tSettings.m_pAggrFilterTrait = sphCreateAggrFilter ( tQueue.m_pAggrFilter, sHaving, tSorterSchema, sError );
5444 }
5445
5446 if ( !tSettings.m_pAggrFilterTrait )
5447 return NULL;
5448 }
5449
5450 ///////////////////
5451 // spawn the queue
5452 ///////////////////
5453
5454 if ( !bGotGroupby )
5455 {
5456 if ( tQueue.m_pUpdate )
5457 pTop = new CSphUpdateQueue ( pQuery->m_iMaxMatches, tQueue.m_pUpdate, pQuery->m_bIgnoreNonexistent, pQuery->m_bStrict );
5458 else if ( tQueue.m_pDeletes )
5459 pTop = new CSphDeleteQueue ( pQuery->m_iMaxMatches, tQueue.m_pDeletes );
5460 else
5461 pTop = CreatePlainSorter ( eMatchFunc, pQuery->m_bSortKbuffer, pQuery->m_iMaxMatches, bUsesAttrs, uPackedFactorFlags & SPH_FACTOR_ENABLE );
5462 } else
5463 {
5464 pTop = sphCreateSorter1st ( eMatchFunc, eGroupFunc, pQuery, tSettings, uPackedFactorFlags & SPH_FACTOR_ENABLE );
5465 }
5466
5467 if ( !pTop )
5468 {
5469 sError.SetSprintf ( "internal error: unhandled sorting mode (match-sort=%d, group=%d, group-sort=%d)",
5470 eMatchFunc, bGotGroupby, eGroupFunc );
5471 return NULL;
5472 }
5473
5474 switch ( pQuery->m_eCollation )
5475 {
5476 case SPH_COLLATION_LIBC_CI:
5477 tStateMatch.m_fnStrCmp = sphCollateLibcCI;
5478 tStateGroup.m_fnStrCmp = sphCollateLibcCI;
5479 break;
5480 case SPH_COLLATION_LIBC_CS:
5481 tStateMatch.m_fnStrCmp = sphCollateLibcCS;
5482 tStateGroup.m_fnStrCmp = sphCollateLibcCS;
5483 break;
5484 case SPH_COLLATION_UTF8_GENERAL_CI:
5485 tStateMatch.m_fnStrCmp = sphCollateUtf8GeneralCI;
5486 tStateGroup.m_fnStrCmp = sphCollateUtf8GeneralCI;
5487 break;
5488 case SPH_COLLATION_BINARY:
5489 tStateMatch.m_fnStrCmp = sphCollateBinary;
5490 tStateGroup.m_fnStrCmp = sphCollateBinary;
5491 break;
5492 }
5493
5494 assert ( pTop );
5495 pTop->SetState ( tStateMatch );
5496 pTop->SetGroupState ( tStateGroup );
5497 pTop->SetSchema ( tSorterSchema );
5498 pTop->m_bRandomize = bRandomize;
5499
5500 if ( bRandomize )
5501 {
5502 if ( pQuery->m_iRandSeed>=0 )
5503 sphSrand ( (DWORD)pQuery->m_iRandSeed );
5504 else
5505 sphAutoSrand ();
5506 }
5507
5508 tQueue.m_bZonespanlist = bNeedZonespanlist;
5509 tQueue.m_uPackedFactorFlags = uPackedFactorFlags;
5510
5511 return pTop;
5512 }
5513
5514
sphFlattenQueue(ISphMatchSorter * pQueue,CSphQueryResult * pResult,int iTag)5515 int sphFlattenQueue ( ISphMatchSorter * pQueue, CSphQueryResult * pResult, int iTag )
5516 {
5517 if ( !pQueue || !pQueue->GetLength() )
5518 return 0;
5519
5520 int iOffset = pResult->m_dMatches.GetLength ();
5521 pResult->m_dMatches.Resize ( iOffset + pQueue->GetLength() );
5522 int iCopied = pQueue->Flatten ( pResult->m_dMatches.Begin() + iOffset, iTag );
5523 pResult->m_dMatches.Resize ( iOffset + iCopied );
5524 return iCopied;
5525 }
5526
5527
sphHasExpressions(const CSphQuery & tQuery,const CSphSchema & tSchema)5528 bool sphHasExpressions ( const CSphQuery & tQuery, const CSphSchema & tSchema )
5529 {
5530 ARRAY_FOREACH ( i, tQuery.m_dItems )
5531 {
5532 const CSphQueryItem & tItem = tQuery.m_dItems[i];
5533 const CSphString & sExpr = tItem.m_sExpr;
5534
5535 // all expressions that come from parser are automatically aliased
5536 assert ( !tItem.m_sAlias.IsEmpty() );
5537
5538 if ( !( sExpr=="*"
5539 || ( tSchema.GetAttrIndex ( sExpr.cstr() )>=0 && tItem.m_eAggrFunc==SPH_AGGR_NONE && tItem.m_sAlias==sExpr )
5540 || IsGroupbyMagic ( sExpr ) ) )
5541 return true;
5542 }
5543
5544 return false;
5545 }
5546
5547
5548 //
5549 // $Id$
5550 //
5551
5552