1 /*
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's offical duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================*/
25 
26 /*****************************************************************************
27 
28 File name: kmercounts.cpp
29 
30 Author: Greg Boratyn
31 
32 Contents: Implementation of k-mer counting classes
33 
34 ******************************************************************************/
35 
36 
37 #include <ncbi_pch.hpp>
38 
39 #include <math.h>
40 
41 #include <corelib/ncbistre.hpp>
42 #include <corelib/ncbi_limits.hpp>
43 #include <corelib/ncbiexpt.hpp>
44 
45 #include <objmgr/seq_vector.hpp>
46 
47 #include <algo/cobalt/kmercounts.hpp>
48 
49 
50 USING_NCBI_SCOPE;
51 USING_SCOPE(cobalt);
52 
53 // Default values for default params
54 unsigned int CSparseKmerCounts::sm_KmerLength = 4;
55 unsigned int CSparseKmerCounts::sm_AlphabetSize = kAlphabetSize;
56 vector<Uint1> CSparseKmerCounts::sm_TransTable;
57 bool CSparseKmerCounts::sm_UseCompressed = false;
58 CSparseKmerCounts::TCount* CSparseKmerCounts::sm_Buffer = NULL;
59 bool CSparseKmerCounts::sm_ForceSmallerMem = false;
60 
61 unsigned int CBinaryKmerCounts::sm_KmerLength = 3;
62 unsigned int CBinaryKmerCounts::sm_AlphabetSize = kAlphabetSize;
63 vector<Uint1> CBinaryKmerCounts::sm_TransTable;
64 bool CBinaryKmerCounts::sm_UseCompressed = false;
65 
66 static const Uint1 kXaa = 21;
67 
68 
CSparseKmerCounts(const objects::CSeq_loc & seq,objects::CScope & scope)69 CSparseKmerCounts::CSparseKmerCounts(const objects::CSeq_loc& seq,
70                                      objects::CScope& scope)
71 {
72     Reset(seq, scope);
73 }
74 
75 // Initialize position bit vector for k-mer counting
76 // @param sv Sequence [in]
77 // @param pos Index of the k-mer count [out]
78 // @param index Index of letter in sequence [in|out]
79 // @param num_bits Number of bits per letter in pos [in]
80 // @param kmer_len K-mer length [in]
81 // @return True if a valid k-mer was found, false otherwise
InitPosBits(const objects::CSeqVector & sv,Uint4 & pos,unsigned int & index,Uint4 num_bits,Uint4 kmer_len)82 bool CSparseKmerCounts::InitPosBits(const objects::CSeqVector& sv, Uint4& pos,
83                                      unsigned int& index,
84                                      Uint4 num_bits, Uint4 kmer_len)
85 
86 {
87     pos = 0;
88     unsigned i = 0;
89     while (index + kmer_len - 1 < sv.size() && i < kmer_len) {
90 
91         // Skip kmers that contain X (unspecified aa)
92         if (sv[index + i] == kXaa) {
93             index += i + 1;
94 
95             pos = 0;
96             i = 0;
97             continue;
98         }
99         pos |= GetAALetter(sv[index + i]) << (num_bits * (kmer_len - i - 1));
100         i++;
101     }
102 
103     if (i < kmer_len) {
104         return false;
105     }
106 
107     index += i;
108     return true;
109 }
110 
111 
MarkUsed(Uint4 pos,vector<Uint4> & entries,int chunk)112 static void MarkUsed(Uint4 pos, vector<Uint4>& entries, int chunk)
113 {
114     int index = pos / chunk;
115     int offset = pos - index * chunk;
116     Uint4 mask = 0x80000000 >> offset;
117     entries[index] |= mask;
118 }
119 
120 
ReserveCountsMem(unsigned int num_bits)121 CSparseKmerCounts::TCount* CSparseKmerCounts::ReserveCountsMem(
122                                                        unsigned int num_bits)
123 {
124     Uint4 num_elements;
125     TCount* counts = NULL;
126 
127     // Reserve memory for storing counts
128     // there are two methods for indexing counts (see the Reset() method)
129     // if memory cannot be allocated try to allocate for the second method
130     // that requires less memory
131     if (!sm_ForceSmallerMem && sm_KmerLength * num_bits
132                                   < kLengthBitsThreshold) {
133 
134         num_elements = 1 << (num_bits * sm_KmerLength);
135         try {
136             counts = new TCount[num_elements];
137         }
138         catch (std::bad_alloc) {
139             sm_ForceSmallerMem = true;
140             num_elements = (Uint4)pow((double)sm_AlphabetSize,
141                                       (double)sm_KmerLength);
142 
143             try {
144                 counts = new TCount[num_elements];
145             }
146             catch (std::bad_alloc) {
147                 NCBI_THROW(CKmerCountsException, eMemoryAllocation,
148                            "Memory cannot be allocated for k-mer counting."
149                            " Try using compressed alphabet or smaller k.");
150             }
151 
152         }
153     }
154     return counts;
155 }
156 
Reset(const objects::CSeq_loc & seq,objects::CScope & scope)157 void CSparseKmerCounts::Reset(const objects::CSeq_loc& seq,
158                               objects::CScope& scope)
159 {
160     unsigned int kmer_len = sm_KmerLength;
161     unsigned int alphabet_size = sm_AlphabetSize;
162 
163     _ASSERT(kmer_len > 0 && alphabet_size > 0);
164 
165     if (sm_UseCompressed && sm_TransTable.empty()) {
166         NCBI_THROW(CKmerCountsException, eInvalidOptions,
167                    "Compressed alphabet selected, but translation table not"
168                    " specified");
169     }
170 
171     if (!seq.IsWhole() && !seq.IsInt()) {
172         NCBI_THROW(CKmerCountsException, eUnsupportedSeqLoc,
173                    "Unsupported SeqLoc encountered");
174     }
175 
176     _ASSERT(seq.GetId());
177     objects::CSeqVector sv = scope.GetBioseqHandle(*seq.GetId()).GetSeqVector();
178 
179     unsigned int num_elements;
180     unsigned int seq_len = sv.size();
181 
182     m_SeqLength = sv.size();
183     m_Counts.clear();
184     m_NumCounts = 0;
185 
186     if (m_SeqLength < kmer_len) {
187         NCBI_THROW(CKmerCountsException, eBadSequence,
188                    "Sequence shorter than desired k-mer length");
189     }
190 
191     // Compute number of bits needed to represent all letters
192     unsigned int mask = 1;
193     int num = 0;
194     while (alphabet_size > mask) {
195         mask <<= 1;
196         num++;
197     }
198     const int kNumBits = num;
199 
200     TCount* counts = sm_Buffer ? sm_Buffer : ReserveCountsMem(kNumBits);
201 
202     // Vecotr of counts is first computed using regular vector that is later
203     // converted to the sparse vector (list of position-value pairs).
204     // Positions are calculated as binary representations of k-mers, if they
205     // fit in 32 bits. Otherwise as numbers in system with base alphabet size.
206     if (!sm_ForceSmallerMem && kmer_len * kNumBits < kLengthBitsThreshold) {
207 
208         num_elements = 1 << (kNumBits * kmer_len);
209         const Uint4 kMask = num_elements - (1 << kNumBits);
210 
211         _ASSERT(counts);
212         memset(counts, 0, num_elements * sizeof(TCount));
213 
214         const int kBitChunk = sizeof(Uint4) * 8;
215 
216         // Vector indicating non-zero elements
217         vector<Uint4> used_entries(num_elements / kBitChunk + 1);
218 
219         //first k-mer
220         Uint4 i = 0;
221         Uint4 pos;
222         bool is_pos = InitPosBits(sv, pos, i, kNumBits, kmer_len);
223         if (is_pos) {
224             _ASSERT(pos < num_elements);
225             counts[pos]++;
226             MarkUsed(pos, used_entries, kBitChunk);
227             m_NumCounts++;
228 
229             //for each next kmer
230             for (;i < seq_len && is_pos;i++) {
231 
232                 if (GetAALetter(sv[i]) >= alphabet_size) {
233                     NCBI_THROW(CKmerCountsException, eBadSequence,
234                                "Letter out of alphabet in sequnece");
235                 }
236 
237                 // Kmers that contain unspecified amino acid X are not
238                 // considered
239                 if (sv[i] == kXaa) {
240                     i++;
241                     is_pos = InitPosBits(sv, pos, i, kNumBits, kmer_len);
242 
243                     if (i >= seq_len || !is_pos) {
244                         break;
245                     }
246                 }
247 
248                 pos <<= kNumBits;
249                 pos &= kMask;
250                 pos |= GetAALetter(sv[i]);
251                 _ASSERT(pos < num_elements);
252                 counts[pos]++;
253                 MarkUsed(pos, used_entries, kBitChunk);
254                 m_NumCounts++;
255             }
256         }
257 
258         // Convert to sparse vector
259         m_Counts.reserve(m_SeqLength - kmer_len + 1);
260         size_t ind = 0;
261         Uint4 num_bit_chunks = num_elements / kBitChunk + 1;
262         while (ind < num_elements / kBitChunk + 1) {
263 
264             // find next chunk with at least one non-zero count
265             while (ind < num_bit_chunks && used_entries[ind] == 0) {
266                 ind++;
267             }
268 
269             if (ind == num_bit_chunks) {
270                 break;
271             }
272 
273             // find the set bit and get position in the counts vector
274             for (Uint4 mask=0x80000000,j=0;used_entries[ind] != 0;
275                  j++, mask>>=1) {
276                 _ASSERT(j < 32);
277                 if ((used_entries[ind] & mask) != 0) {
278                     pos = ind * kBitChunk + j;
279 
280                     _ASSERT(counts[pos] > 0);
281                     m_Counts.push_back(SVectorElement(pos, counts[pos]));
282 
283                     used_entries[ind] ^= mask;
284                 }
285             }
286             ind++;
287         }
288 
289     }
290     else {
291         _ASSERT(pow((double)alphabet_size, (double)kmer_len)
292                 < numeric_limits<Uint4>::max());
293 
294         AutoArray<double> base(kmer_len);
295         for (Uint4 i=0;i < kmer_len;i++) {
296             base[i] = pow((double)alphabet_size, (double)i);
297         }
298 
299         num_elements = (Uint4)pow((double)alphabet_size, (double)kmer_len);
300 
301         _ASSERT(counts);
302         memset(counts, 0, num_elements * sizeof(TCount));
303 
304         // Vector indicating non-zero elements
305         const int kBitChunk = sizeof(Uint4) * 8;
306         vector<Uint4> used_entries(num_elements / kBitChunk + 1);
307 
308         Uint4 pos;
309         for (unsigned i=0;i < seq_len - kmer_len + 1;i++) {
310 
311             // Kmers that contain unspecified amino acid X are not considered
312             if (sv[i + kmer_len - 1] == kXaa) {
313                 i += kmer_len - 1;
314                 continue;
315             }
316 
317             pos = GetAALetter(sv[i]) - 1;
318             _ASSERT(GetAALetter(sv[i]) <= alphabet_size);
319             for (Uint4 j=1;j < kmer_len;j++) {
320                 pos += (Uint4)(((double)GetAALetter(sv[i + j]) - 1) * base[j]);
321                 _ASSERT(GetAALetter(sv[i + j]) <= alphabet_size);
322             }
323             counts[pos]++;
324             MarkUsed(pos, used_entries, kBitChunk);
325             m_NumCounts++;
326         }
327 
328         // Convert to sparse vector
329         m_Counts.reserve(m_SeqLength - kmer_len + 1);
330         size_t ind = 0;
331         Uint4 num_bit_chunks = num_elements / kBitChunk + 1;
332         while (ind < num_elements / kBitChunk + 1) {
333 
334             // find next chunk with at least one non-zero count
335             while (ind < num_bit_chunks && used_entries[ind] == 0) {
336                 ind++;
337             }
338 
339             if (ind == num_bit_chunks) {
340                 break;
341             }
342 
343             // find the set bit and get position in the counts vector
344             for (Uint4 mask=0x80000000,j=0;used_entries[ind] != 0;
345                  j++, mask>>=1) {
346                 _ASSERT(j < 32);
347                 if ((used_entries[ind] & mask) != 0) {
348                     pos = ind * kBitChunk + j;
349 
350                     _ASSERT(counts[pos] > 0);
351                     m_Counts.push_back(SVectorElement(pos, counts[pos]));
352 
353                     used_entries[ind] ^= mask;
354                 }
355             }
356             ind++;
357         }
358     }
359 
360     if (!sm_Buffer && counts) {
361         delete [] counts;
362     }
363 
364 }
365 
FractionCommonKmersDist(const CSparseKmerCounts & v1,const CSparseKmerCounts & v2)366 double CSparseKmerCounts::FractionCommonKmersDist(
367                                                   const CSparseKmerCounts& v1,
368                                                   const CSparseKmerCounts& v2)
369 {
370     _ASSERT(GetKmerLength() > 0);
371 
372     unsigned int num_common = CountCommonKmers(v1, v2, true);
373 
374     unsigned int num_counts1 = v1.GetNumCounts();
375     unsigned int num_counts2 = v2.GetNumCounts();
376     unsigned int fewer_counts
377         =  num_counts1 < num_counts2 ? num_counts1 : num_counts2;
378 
379     // In RC Edgar, BMC Bioinformatics 5:113, 2004 the denominator is
380     // SeqLen - k + 1 that is equal to number of counts only if sequence
381     // does not contain Xaa.
382     return 1.0 - (double)num_common / (double)fewer_counts;
383 }
384 
385 
FractionCommonKmersGlobalDist(const CSparseKmerCounts & v1,const CSparseKmerCounts & v2)386 double CSparseKmerCounts::FractionCommonKmersGlobalDist(
387                                                   const CSparseKmerCounts& v1,
388                                                   const CSparseKmerCounts& v2)
389 {
390     _ASSERT(GetKmerLength() > 0);
391 
392     unsigned int num_common = CountCommonKmers(v1, v2, true);
393 
394     unsigned int num_counts1 = v1.GetNumCounts();
395     unsigned int num_counts2 = v2.GetNumCounts();
396     unsigned int more_counts
397         = num_counts1 > num_counts2 ? num_counts1 : num_counts2;
398 
399     // In RC Edgar, BMC Bioinformatics 5:113, 2004 the denominator is
400     // SeqLen - k + 1 that is equal to number of counts only if sequence
401     // does not contain Xaa.
402     return 1.0 - (double)num_common / (double)more_counts;
403 }
404 
405 
CountCommonKmers(const CSparseKmerCounts & vect1,const CSparseKmerCounts & vect2,bool repetitions)406 unsigned int CSparseKmerCounts::CountCommonKmers(
407                                               const CSparseKmerCounts& vect1,
408                                               const CSparseKmerCounts& vect2,
409                                               bool repetitions)
410 
411 {
412 
413     unsigned int result = 0;
414     TNonZeroCounts_CI it1 = vect1.m_Counts.begin();
415     TNonZeroCounts_CI it2 = vect2.m_Counts.begin();
416 
417     // Iterating through non zero counts in both vectors
418     do {
419         // For each vector element that is non zero in vect1 and vect2
420         while (it1 != vect1.m_Counts.end() && it2 != vect2.m_Counts.end()
421                && it1->position == it2->position) {
422 
423             // Increase number of common kmers found
424             if (repetitions) {
425                 result += (unsigned)(it1->value < it2->value
426                                      ? it1->value : it2->value);
427             }
428             else {
429                 result++;
430             }
431             ++it1;
432             ++it2;
433         }
434 
435         //Finding the next pair of non-zero element in both vect1 and vect2
436 
437         while (it1 != vect1.m_Counts.end() && it2 != vect2.m_Counts.end()
438                && it1->position < it2->position) {
439             ++it1;
440         }
441 
442         while (it1 != vect1.m_Counts.end() && it2 != vect2.m_Counts.end()
443                && it2->position < it1->position) {
444             ++it2;
445         }
446 
447 
448     } while (it1 != vect1.m_Counts.end() && it2 != vect2.m_Counts.end());
449 
450     return result;
451 }
452 
453 
PreCount(void)454 void CSparseKmerCounts::PreCount(void)
455 {
456     // Reserve memory for storing counts of all possible k-mers
457     // compute number of bits needed to represent all letters
458     unsigned int mask = 1;
459     int num_bits = 0;
460     while (sm_AlphabetSize > mask) {
461         mask <<= 1;
462         num_bits++;
463     }
464 
465     sm_Buffer = ReserveCountsMem(num_bits);
466 }
467 
PostCount(void)468 void CSparseKmerCounts::PostCount(void)
469 {
470     if (sm_Buffer) {
471         delete [] sm_Buffer;
472     }
473     sm_Buffer = NULL;
474     sm_ForceSmallerMem = false;
475 }
476 
477 
Print(CNcbiOstream & ostr) const478 CNcbiOstream& CSparseKmerCounts::Print(CNcbiOstream& ostr) const
479 {
480     for (CSparseKmerCounts::TNonZeroCounts_CI it=BeginNonZero();
481          it != EndNonZero();++it) {
482         ostr << it->position << ":" << (int)it->value << " ";
483     }
484     ostr << NcbiEndl;
485 
486     return ostr;
487 }
488 
489 
CBinaryKmerCounts(const objects::CSeq_loc & seq,objects::CScope & scope)490 CBinaryKmerCounts::CBinaryKmerCounts(const objects::CSeq_loc& seq,
491                                      objects::CScope& scope)
492 {
493     Reset(seq, scope);
494 }
495 
496 
Reset(const objects::CSeq_loc & seq,objects::CScope & scope)497 void CBinaryKmerCounts::Reset(const objects::CSeq_loc& seq,
498                               objects::CScope& scope)
499 {
500     unsigned int kmer_len = sm_KmerLength;
501     unsigned int alphabet_size = sm_AlphabetSize;
502 
503     _ASSERT(kmer_len > 0 && alphabet_size > 0);
504 
505     if (sm_UseCompressed && sm_TransTable.empty()) {
506         NCBI_THROW(CKmerCountsException, eInvalidOptions,
507                    "Compressed alphabet selected, but translation table not"
508                    " specified");
509     }
510 
511     if (!seq.IsWhole() && !seq.IsInt()) {
512         NCBI_THROW(CKmerCountsException, eUnsupportedSeqLoc,
513                    "Unsupported SeqLoc encountered");
514     }
515 
516     _ASSERT(seq.GetId());
517     objects::CSeqVector sv = scope.GetBioseqHandle(*seq.GetId()).GetSeqVector();
518 
519     unsigned int num_elements;
520     unsigned int seq_len = sv.size();
521 
522     m_SeqLength = sv.size();
523     m_Counts.clear();
524     m_NumCounts = 0;
525 
526     if (m_SeqLength < kmer_len) {
527         NCBI_THROW(CKmerCountsException, eBadSequence,
528                    "Sequence shorter than desired k-mer length");
529     }
530 
531     const int kBitChunk = sizeof(Uint4) * 8;
532 
533     // Vecotr of counts is first computed using regular vector that is later
534     // converted to the sparse vector (list of position-value pairs).
535     // Positions are calculated as binary representations of k-mers, if they
536     // fit in 32 bits. Otherwise as numbers in system with base alphabet size.
537 
538     _ASSERT(pow((double)alphabet_size, (double)kmer_len)
539             < numeric_limits<Uint4>::max());
540 
541     AutoArray<double> base(kmer_len);
542     for (Uint4 i=0;i < kmer_len;i++) {
543         base[i] = pow((double)alphabet_size, (double)i);
544     }
545 
546     num_elements = (Uint4)pow((double)alphabet_size, (double)kmer_len);
547 
548     m_Counts.resize(num_elements / 32 + 1, (Uint4)0);
549 
550     Uint4 pos;
551     unsigned i = 0;
552 
553     // find the first k-mer that does not contain Xaa
554     bool is_xaa;
555     do {
556         is_xaa = false;
557         for (unsigned j=0;j < kmer_len && i < seq_len - kmer_len + 1;j++) {
558 
559             if (sv[i + j] == kXaa) {
560                 i += kmer_len;
561                 is_xaa = true;
562                 break;
563             }
564         }
565     } while (i < seq_len - kmer_len + 1 && is_xaa);
566     // if sequences contains only Xaa's then exit
567     if (i >= seq_len - kmer_len + 1) {
568         return;
569     }
570 
571     // for each subsequence of kmer_len residues
572     for (;i < seq_len - kmer_len + 1;i++) {
573 
574         // k-mers that contain unspecified amino acid X are not considered
575         if (sv[i + kmer_len - 1] == kXaa) {
576 
577             // move k-mer window past Xaa
578             i += kmer_len;
579 
580             // find first k-mer that does not contain Xaa
581             do {
582                 is_xaa = false;
583                 for (unsigned j=0;j < kmer_len && i < seq_len - kmer_len + 1;
584                      j++) {
585 
586                     if (sv[i + j] == kXaa) {
587                         i += kmer_len;
588                         is_xaa = true;
589                         break;
590                     }
591                 }
592             } while (i < seq_len - kmer_len + 1 && is_xaa);
593 
594             // if Xaa are found till the end of sequence exit
595             if (i >= seq_len - kmer_len + 1) {
596                 break;
597             }
598         }
599 
600         pos = GetAALetter(sv[i]) - 1;
601         _ASSERT(GetAALetter(sv[i]) <= alphabet_size);
602         for (Uint4 j=1;j < kmer_len;j++) {
603             pos += (Uint4)(((double)GetAALetter(sv[i + j]) - 1) * base[j]);
604             _ASSERT(GetAALetter(sv[i + j]) <= alphabet_size);
605         }
606         MarkUsed(pos, m_Counts, kBitChunk);
607     }
608 
609     m_NumCounts = 0;
610     for (size_t i=0;i < m_Counts.size();i++) {
611         m_NumCounts += x_Popcount(m_Counts[i]);
612     }
613 }
614 
615 
FractionCommonKmersDist(const CBinaryKmerCounts & v1,const CBinaryKmerCounts & v2)616 double CBinaryKmerCounts::FractionCommonKmersDist(const CBinaryKmerCounts& v1,
617                                                   const CBinaryKmerCounts& v2)
618 {
619     _ASSERT(GetKmerLength() > 0);
620 
621     unsigned int num_common = CountCommonKmers(v1, v2);
622 
623     unsigned int num_counts1 = v1.GetNumCounts();
624     unsigned int num_counts2 = v2.GetNumCounts();
625     unsigned int fewer_counts
626         =  num_counts1 < num_counts2 ? num_counts1 : num_counts2;
627 
628     // In RC Edgar, BMC Bioinformatics 5:113, 2004 the denominator is
629     // SeqLen - k + 1 that is equal to number of counts only if sequence
630     // does not contain Xaa.
631     return 1.0 - (double)num_common / (double)fewer_counts;
632 }
633 
634 
FractionCommonKmersGlobalDist(const CBinaryKmerCounts & v1,const CBinaryKmerCounts & v2)635 double CBinaryKmerCounts::FractionCommonKmersGlobalDist(
636                                                   const CBinaryKmerCounts& v1,
637                                                   const CBinaryKmerCounts& v2)
638 {
639     _ASSERT(GetKmerLength() > 0);
640 
641     unsigned int num_common = CountCommonKmers(v1, v2);
642 
643     unsigned int num_counts1 = v1.GetNumCounts();
644     unsigned int num_counts2 = v2.GetNumCounts();
645     unsigned int more_counts
646         = num_counts1 > num_counts2 ? num_counts1 : num_counts2;
647 
648     // In RC Edgar, BMC Bioinformatics 5:113, 2004 the denominator is
649     // SeqLen - k + 1 that is equal to number of counts only if sequence
650     // does not contain Xaa.
651     return 1.0 - (double)num_common / (double)more_counts;
652 }
653 
654 
CountCommonKmers(const CBinaryKmerCounts & vect1,const CBinaryKmerCounts & vect2)655 unsigned int CBinaryKmerCounts::CountCommonKmers(
656                                               const CBinaryKmerCounts& vect1,
657                                               const CBinaryKmerCounts& vect2)
658 {
659     unsigned int result = 0;
660     const Uint4* counts1 = &vect1.m_Counts[0];
661     const Uint4* counts2 = &vect2.m_Counts[0];
662     int size = vect1.m_Counts.size();
663 
664     for (int i=0;i < size;i++) {
665         result += x_Popcount(counts1[i] & counts2[i]);
666     }
667 
668     return result;
669 }
670