1 /*
2 
3   VSEARCH: a versatile open source tool for metagenomics
4 
5   Copyright (C) 2014-2021, Torbjorn Rognes, Frederic Mahe and Tomas Flouri
6   All rights reserved.
7 
8   Contact: Torbjorn Rognes <torognes@ifi.uio.no>,
9   Department of Informatics, University of Oslo,
10   PO Box 1080 Blindern, NO-0316 Oslo, Norway
11 
12   This software is dual-licensed and available under a choice
13   of one of two licenses, either under the terms of the GNU
14   General Public License version 3 or the BSD 2-Clause License.
15 
16 
17   GNU General Public License version 3
18 
19   This program is free software: you can redistribute it and/or modify
20   it under the terms of the GNU General Public License as published by
21   the Free Software Foundation, either version 3 of the License, or
22   (at your option) any later version.
23 
24   This program is distributed in the hope that it will be useful,
25   but WITHOUT ANY WARRANTY; without even the implied warranty of
26   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
27   GNU General Public License for more details.
28 
29   You should have received a copy of the GNU General Public License
30   along with this program.  If not, see <http://www.gnu.org/licenses/>.
31 
32 
33   The BSD 2-Clause License
34 
35   Redistribution and use in source and binary forms, with or without
36   modification, are permitted provided that the following conditions
37   are met:
38 
39   1. Redistributions of source code must retain the above copyright
40   notice, this list of conditions and the following disclaimer.
41 
42   2. Redistributions in binary form must reproduce the above copyright
43   notice, this list of conditions and the following disclaimer in the
44   documentation and/or other materials provided with the distribution.
45 
46   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
47   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
48   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
49   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
50   COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
51   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
52   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
53   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
54   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55   LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
56   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
57   POSSIBILITY OF SUCH DAMAGE.
58 
59 */
60 
61 #include "vsearch.h"
62 
63 /* per thread data */
64 
hit_compare_byid_typed(struct hit * x,struct hit * y)65 inline int hit_compare_byid_typed(struct hit * x, struct hit * y)
66 {
67   // high id, then low id
68   // early target, then late target
69 
70   if (x->rejected < y->rejected) {
71     return -1;
72   } else
73     if (x->rejected > y->rejected) {
74       return +1;
75     } else
76       if (x->rejected == 1) {
77         return 0;
78       } else
79         if (x->aligned > y->aligned) {
80           return -1;
81         } else
82           if (x->aligned < y->aligned) {
83             return +1;
84           } else
85             if (x->aligned == 0) {
86               return 0;
87             } else
88               if (x->id > y->id) {
89                 return -1;
90               } else
91                 if (x->id < y->id) {
92                   return +1;
93                 } else
94                   if (x->target < y->target) {
95                     return -1;
96                   } else
97                     if (x->target > y->target) {
98                       return +1;
99                     } else {
100                       return 0;
101 
102         }
103 }
104 
hit_compare_bysize_typed(struct hit * x,struct hit * y)105 inline int hit_compare_bysize_typed(struct hit * x, struct hit * y)
106 {
107   // high abundance, then low abundance
108   // high id, then low id
109   // early target, then late target
110 
111   if (x->rejected < y->rejected) {
112     return -1;
113   } else
114     if (x->rejected > y->rejected) {
115       return +1;
116     } else
117       if (x->rejected == 1) {
118         return 0;
119       } else
120         if (x->aligned > y->aligned) {
121           return -1;
122         } else
123           if (x->aligned < y->aligned) {
124             return +1;
125           } else
126             if (x->aligned == 0) {
127               return 0;
128             } else
129               if (db_getabundance(x->target) > db_getabundance(y->target)) {
130                 return -1;
131               } else
132                 if (db_getabundance(x->target) < db_getabundance(y->target)) {
133                   return +1;
134                 } else
135                   if (x->id > y->id) {
136                     return -1;
137                   } else
138                     if (x->id < y->id) {
139                       return +1;
140                     } else
141                       if (x->target < y->target) {
142                         return -1;
143                       } else
144                         if (x->target > y->target) {
145                           return +1;
146                         } else {
147                           return 0;
148 
149         }
150 }
151 
hit_compare_byid(const void * a,const void * b)152 int hit_compare_byid(const void * a, const void * b)
153 {
154   return hit_compare_byid_typed((struct hit *) a, (struct hit *) b);
155 }
156 
hit_compare_bysize(const void * a,const void * b)157 int hit_compare_bysize(const void * a, const void * b)
158 {
159   return hit_compare_bysize_typed((struct hit *) a, (struct hit *) b);
160 }
161 
search_enough_kmers(struct searchinfo_s * si,unsigned int count)162 bool search_enough_kmers(struct searchinfo_s * si,
163                          unsigned int count)
164 {
165   return (count >= opt_minwordmatches) || (count >= si->kmersamplecount);
166 }
167 
search_topscores(struct searchinfo_s * si)168 void search_topscores(struct searchinfo_s * si)
169 {
170   /*
171     Count the kmer hits in each database sequence and
172     make a sorted list of a given number (th)
173     of the database sequences with the highest number of matching kmers.
174     These are stored in the min heap array.
175   */
176 
177   /* count kmer hits in the database sequences */
178   int indexed_count = dbindex_getcount();
179 
180   /* zero counts */
181   memset(si->kmers, 0, indexed_count * sizeof(count_t));
182 
183   minheap_empty(si->m);
184 
185   for(unsigned int i=0; i<si->kmersamplecount; i++)
186     {
187       unsigned int kmer = si->kmersample[i];
188       unsigned char * bitmap = dbindex_getbitmap(kmer);
189 
190       if (bitmap)
191         {
192 #ifdef __x86_64__
193           if (ssse3_present) {
194             increment_counters_from_bitmap_ssse3(si->kmers,
195                                                  bitmap, indexed_count);
196           } else {
197             increment_counters_from_bitmap_sse2(si->kmers,
198                                                 bitmap, indexed_count);
199 
200         }
201 #else
202           increment_counters_from_bitmap(si->kmers, bitmap, indexed_count);
203 #endif
204         }
205       else
206         {
207           unsigned int * list = dbindex_getmatchlist(kmer);
208           unsigned int count = dbindex_getmatchcount(kmer);
209           for(unsigned int j=0; j < count; j++) {
210             si->kmers[list[j]]++;
211 
212         }
213         }
214     }
215 
216   int minmatches = MIN(opt_minwordmatches, si->kmersamplecount);
217 
218   for(int i=0; i < indexed_count; i++)
219     {
220       count_t count = si->kmers[i];
221       if (count >= minmatches)
222         {
223           unsigned int seqno = dbindex_getmapping(i);
224           unsigned int length = db_getsequencelen(seqno);
225 
226           elem_t novel;
227           novel.count = count;
228           novel.seqno = seqno;
229           novel.length = length;
230 
231           minheap_add(si->m, & novel);
232         }
233     }
234 
235   minheap_sort(si->m);
236 }
237 
seqncmp(char * a,char * b,uint64_t n)238 int seqncmp(char * a, char * b, uint64_t n)
239 {
240   for(unsigned int i = 0; i<n; i++)
241     {
242       int x = chrmap_4bit[(int)(a[i])];
243       int y = chrmap_4bit[(int)(b[i])];
244       if (x < y) {
245         return -1;
246       } else if (x > y) {
247         return +1;
248 
249         }
250     }
251   return 0;
252 }
253 
254 
align_trim(struct hit * hit)255 void align_trim(struct hit * hit)
256 {
257   /* trim alignment and fill in info */
258   /* assumes that the hit has been aligned */
259 
260   /* info for semi-global alignment (without gaps at ends) */
261 
262   hit->trim_aln_left = 0;
263   hit->trim_q_left = 0;
264   hit->trim_t_left = 0;
265   hit->trim_aln_right = 0;
266   hit->trim_q_right = 0;
267   hit->trim_t_right = 0;
268 
269   /* left trim alignment */
270 
271   char * p = hit->nwalignment;
272   char op;
273   int64_t run;
274   if (*p)
275     {
276       run = 1;
277       int scanlength = 0;
278       sscanf(p, "%" PRId64 "%n", &run, &scanlength);
279       op = *(p+scanlength);
280       if (op != 'M')
281         {
282           hit->trim_aln_left = 1 + scanlength;
283           if (op == 'D') {
284             hit->trim_q_left = run;
285           } else {
286             hit->trim_t_left = run;
287 
288         }
289         }
290     }
291 
292   /* right trim alignment */
293 
294   char * e = hit->nwalignment + strlen(hit->nwalignment);
295   if (e > hit->nwalignment)
296     {
297       p = e - 1;
298       op = *p;
299       if (op != 'M')
300         {
301           while ((p > hit->nwalignment) && (*(p-1) <= '9')) {
302             p--;
303 
304         }
305           run = 1;
306           sscanf(p, "%" PRId64, &run);
307           hit->trim_aln_right = e - p;
308           if (op == 'D') {
309             hit->trim_q_right = run;
310           } else {
311             hit->trim_t_right = run;
312 
313         }
314         }
315     }
316 
317   if (hit->trim_q_left >= hit->nwalignmentlength) {
318     hit->trim_q_right = 0;
319 
320         }
321 
322   if (hit->trim_t_left >= hit->nwalignmentlength) {
323     hit->trim_t_right = 0;
324 
325         }
326 
327   hit->internal_alignmentlength = hit->nwalignmentlength
328     - hit->trim_q_left - hit->trim_t_left
329     - hit->trim_q_right - hit->trim_t_right;
330 
331   hit->internal_indels = hit->nwindels
332     - hit->trim_q_left - hit->trim_t_left
333     - hit->trim_q_right - hit->trim_t_right;
334 
335   hit->internal_gaps = hit->nwgaps
336     - ((hit->trim_q_left  + hit->trim_t_left)  > 0 ? 1 : 0)
337     - ((hit->trim_q_right + hit->trim_t_right) > 0 ? 1 : 0);
338 
339   /* CD-HIT */
340   hit->id0 = hit->shortest > 0 ? 100.0 * hit->matches / hit->shortest : 0.0;
341   /* all diffs */
342   hit->id1 = hit->nwalignmentlength > 0 ?
343     100.0 * hit->matches / hit->nwalignmentlength : 0.0;
344   /* internal diffs */
345   hit->id2 = hit->internal_alignmentlength > 0 ?
346     100.0 * hit->matches / hit->internal_alignmentlength : 0.0;
347   /* Marine Biology Lab */
348   hit->id3 = MAX(0.0, 100.0 * (1.0 - (1.0 * (hit->mismatches + hit->nwgaps) /
349                                       hit->longest)));
350   /* BLAST */
351   hit->id4 = hit->nwalignmentlength > 0 ?
352     100.0 * hit->matches / hit->nwalignmentlength : 0.0;
353 
354   switch (opt_iddef)
355     {
356     case 0:
357       hit->id = hit->id0;
358       break;
359     case 1:
360       hit->id = hit->id1;
361       break;
362     case 2:
363       hit->id = hit->id2;
364       break;
365     case 3:
366       hit->id = hit->id3;
367       break;
368     case 4:
369       hit->id = hit->id4;
370       break;
371     }
372 }
373 
search_acceptable_unaligned(struct searchinfo_s * si,int target)374 int search_acceptable_unaligned(struct searchinfo_s * si,
375                                 int target)
376 {
377   /* consider whether a hit satisfy accept criteria before alignment */
378 
379   char * qseq = si->qsequence;
380   char * dlabel = db_getheader(target);
381   char * dseq = db_getsequence(target);
382   int64_t dseqlen = db_getsequencelen(target);
383   int64_t tsize = db_getabundance(target);
384 
385   if (
386       /* maxqsize */
387       (si->qsize <= opt_maxqsize)
388       &&
389       /* mintsize */
390       (tsize >= opt_mintsize)
391       &&
392       /* minsizeratio */
393       (si->qsize >= opt_minsizeratio * tsize)
394       &&
395       /* maxsizeratio */
396       (si->qsize <= opt_maxsizeratio * tsize)
397       &&
398       /* minqt */
399       (si->qseqlen >= opt_minqt * dseqlen)
400       &&
401       /* maxqt */
402       (si->qseqlen <= opt_maxqt * dseqlen)
403       &&
404       /* minsl */
405       (si->qseqlen < dseqlen ?
406        si->qseqlen >= opt_minsl * dseqlen :
407        dseqlen >= opt_minsl * si->qseqlen)
408       &&
409       /* maxsl */
410       (si->qseqlen < dseqlen ?
411        si->qseqlen <= opt_maxsl * dseqlen :
412        dseqlen <= opt_maxsl * si->qseqlen)
413       &&
414       /* idprefix */
415       ((si->qseqlen >= opt_idprefix) &&
416        (dseqlen >= opt_idprefix) &&
417        (!seqncmp(qseq, dseq, opt_idprefix)))
418       &&
419       /* idsuffix */
420       ((si->qseqlen >= opt_idsuffix) &&
421        (dseqlen >= opt_idsuffix) &&
422        (!seqncmp(qseq+si->qseqlen-opt_idsuffix,
423                 dseq+dseqlen-opt_idsuffix,
424                 opt_idsuffix)))
425       &&
426       /* self */
427       ((!opt_self) || (strcmp(si->query_head, dlabel)))
428       &&
429       /* selfid */
430       ((!opt_selfid) ||
431        (si->qseqlen != dseqlen) ||
432        (seqncmp(qseq, dseq, si->qseqlen)))
433       )
434     {
435       /* needs further consideration */
436       return 1;
437     }
438   else
439     {
440       /* reject */
441       return 0;
442     }
443 }
444 
search_acceptable_aligned(struct searchinfo_s * si,struct hit * hit)445 int search_acceptable_aligned(struct searchinfo_s * si,
446                               struct hit * hit)
447 {
448   if (/* weak_id */
449       (hit->id >= 100.0 * opt_weak_id) &&
450       /* maxsubs */
451       (hit->mismatches <= opt_maxsubs) &&
452       /* maxgaps */
453       (hit->internal_gaps <= opt_maxgaps) &&
454       /* mincols */
455       (hit->internal_alignmentlength >= opt_mincols) &&
456       /* leftjust */
457       ((!opt_leftjust) || (hit->trim_q_left +
458                            hit->trim_t_left == 0)) &&
459       /* rightjust */
460       ((!opt_rightjust) || (hit->trim_q_right +
461                             hit->trim_t_right == 0)) &&
462       /* query_cov */
463       (hit->matches + hit->mismatches >= opt_query_cov * si->qseqlen) &&
464       /* target_cov */
465       (hit->matches + hit->mismatches >=
466        opt_target_cov * db_getsequencelen(hit->target)) &&
467       /* maxid */
468       (hit->id <= 100.0 * opt_maxid) &&
469       /* mid */
470       (100.0 * hit->matches / (hit->matches + hit->mismatches) >= opt_mid) &&
471       /* maxdiffs */
472       (hit->mismatches + hit->internal_indels <= opt_maxdiffs))
473     {
474       if (opt_cluster_unoise)
475         {
476           int d = hit->mismatches;
477           double skew = 1.0 * si->qsize / db_getabundance(hit->target);
478           double beta = 1.0 / pow(2, 1.0 * opt_unoise_alpha * d + 1);
479 
480           if (skew <= beta || d == 0)
481             {
482               /* accepted */
483               hit->accepted = true;
484               hit->weak = false;
485               return 1;
486             }
487           else
488             {
489               /* rejected, but weak hit */
490               hit->rejected = true;
491               hit->weak = true;
492               return 0;
493             }
494         }
495       else
496         {
497           if (hit->id >= 100.0 * opt_id)
498             {
499               /* accepted */
500               hit->accepted = true;
501               hit->weak = false;
502               return 1;
503             }
504           else
505             {
506               /* rejected, but weak hit */
507               hit->rejected = true;
508               hit->weak = true;
509               return 0;
510             }
511         }
512     }
513   else
514     {
515       /* rejected */
516       hit->rejected = true;
517       hit->weak = false;
518       return 0;
519     }
520 }
521 
align_delayed(struct searchinfo_s * si)522 void align_delayed(struct searchinfo_s * si)
523 {
524   /* compute global alignment */
525 
526   unsigned int target_list[MAXDELAYED];
527   CELL  nwscore_list[MAXDELAYED];
528   unsigned short nwalignmentlength_list[MAXDELAYED];
529   unsigned short nwmatches_list[MAXDELAYED];
530   unsigned short nwmismatches_list[MAXDELAYED];
531   unsigned short nwgaps_list[MAXDELAYED];
532   char * nwcigar_list[MAXDELAYED];
533 
534   int target_count = 0;
535 
536   for(int x = si->finalized; x < si->hit_count; x++)
537     {
538       struct hit * hit = si->hits + x;
539       if (! hit->rejected) {
540         target_list[target_count++] = hit->target;
541 
542         }
543     }
544 
545   if (target_count) {
546     search16(si->s,
547              target_count,
548              target_list,
549              nwscore_list,
550              nwalignmentlength_list,
551              nwmatches_list,
552              nwmismatches_list,
553              nwgaps_list,
554              nwcigar_list);
555 
556         }
557 
558   int i = 0;
559 
560   for(int x = si->finalized; x < si->hit_count; x++)
561     {
562       /* maxrejects or maxaccepts reached - ignore remaining hits */
563       if ((si->rejects < opt_maxrejects) && (si->accepts < opt_maxaccepts))
564         {
565           struct hit * hit = si->hits + x;
566 
567           if (hit->rejected)
568             {
569               si->rejects++;
570             }
571           else
572             {
573               int64_t target = hit->target;
574               int64_t nwscore = nwscore_list[i];
575 
576               char * nwcigar;
577               int64_t nwalignmentlength;
578               int64_t nwmatches;
579               int64_t nwmismatches;
580               int64_t nwgaps;
581 
582               int64_t dseqlen = db_getsequencelen(target);
583 
584               if (nwscore == SHRT_MAX)
585                 {
586                   /* In case the SIMD aligner cannot align,
587                      perform a new alignment with the
588                      linear memory aligner */
589 
590                   char * dseq = db_getsequence(target);
591 
592                   if (nwcigar_list[i]) {
593                     xfree(nwcigar_list[i]);
594 
595         }
596 
597                   nwcigar = xstrdup(si->lma->align(si->qsequence,
598                                                   dseq,
599                                                   si->qseqlen,
600                                                   dseqlen));
601 
602                   si->lma->alignstats(nwcigar,
603                                       si->qsequence,
604                                       dseq,
605                                       & nwscore,
606                                       & nwalignmentlength,
607                                       & nwmatches,
608                                       & nwmismatches,
609                                       & nwgaps);
610                 }
611               else
612                 {
613                   nwalignmentlength = nwalignmentlength_list[i];
614                   nwmatches = nwmatches_list[i];
615                   nwmismatches = nwmismatches_list[i];
616                   nwgaps = nwgaps_list[i];
617                   nwcigar = nwcigar_list[i];
618                 }
619 
620               hit->aligned = true;
621               hit->shortest = MIN(si->qseqlen, dseqlen);
622               hit->longest = MAX(si->qseqlen, dseqlen);
623               hit->nwalignment = nwcigar;
624               hit->nwscore = nwscore;
625               hit->nwdiff = nwalignmentlength - nwmatches;
626               hit->nwgaps = nwgaps;
627               hit->nwindels = nwalignmentlength - nwmatches - nwmismatches;
628               hit->nwalignmentlength = nwalignmentlength;
629               hit->nwid = 100.0 * (nwalignmentlength - hit->nwdiff) /
630                 nwalignmentlength;
631               hit->matches = nwalignmentlength - hit->nwdiff;
632               hit->mismatches = hit->nwdiff - hit->nwindels;
633 
634               /* trim alignment and compute numbers excluding terminal gaps */
635               align_trim(hit);
636 
637               /* test accept/reject criteria after alignment */
638               if (search_acceptable_aligned(si, hit)) {
639                 si->accepts++;
640               } else {
641                 si->rejects++;
642 
643         }
644 
645               i++;
646             }
647         }
648     }
649 
650   /* free ignored alignments */
651   while (i < target_count) {
652     xfree(nwcigar_list[i++]);
653 
654         }
655 
656   si->finalized = si->hit_count;
657 }
658 
search_onequery(struct searchinfo_s * si,int seqmask)659 void search_onequery(struct searchinfo_s * si, int seqmask)
660 {
661   si->hit_count = 0;
662 
663   search16_qprep(si->s, si->qsequence, si->qseqlen);
664 
665   si->lma = new LinearMemoryAligner;
666 
667   int64_t * scorematrix = si->lma->scorematrix_create(opt_match, opt_mismatch);
668 
669   si->lma->set_parameters(scorematrix,
670                           opt_gap_open_query_left,
671                           opt_gap_open_target_left,
672                           opt_gap_open_query_interior,
673                           opt_gap_open_target_interior,
674                           opt_gap_open_query_right,
675                           opt_gap_open_target_right,
676                           opt_gap_extension_query_left,
677                           opt_gap_extension_target_left,
678                           opt_gap_extension_query_interior,
679                           opt_gap_extension_target_interior,
680                           opt_gap_extension_query_right,
681                           opt_gap_extension_target_right);
682 
683   /* extract unique kmer samples from query*/
684   unique_count(si->uh, opt_wordlength,
685                si->qseqlen, si->qsequence,
686                & si->kmersamplecount, & si->kmersample, seqmask);
687 
688   /* find database sequences with the most kmer hits */
689   search_topscores(si);
690 
691   /* analyse targets with the highest number of kmer hits */
692   si->accepts = 0;
693   si->rejects = 0;
694   si->finalized = 0;
695 
696   int delayed = 0;
697 
698   int t = 0;
699   while ((si->finalized + delayed < opt_maxaccepts + opt_maxrejects - 1) &&
700          (si->rejects < opt_maxrejects) &&
701          (si->accepts < opt_maxaccepts) &&
702          (!minheap_isempty(si->m)))
703     {
704       elem_t e = minheap_poplast(si->m);
705 
706       struct hit * hit = si->hits + si->hit_count;
707 
708       hit->target = e.seqno;
709       hit->count = e.count;
710       hit->strand = si->strand;
711       hit->rejected = false;
712       hit->accepted = false;
713       hit->aligned = false;
714       hit->weak = false;
715       hit->nwalignment = nullptr;
716 
717       /* Test some accept/reject criteria before alignment */
718       if (search_acceptable_unaligned(si, e.seqno))
719         {
720           delayed++;
721         }
722       else
723         {
724           hit->rejected = true;
725         }
726 
727       si->hit_count++;
728 
729       if (delayed == MAXDELAYED)
730         {
731           align_delayed(si);
732           delayed = 0;
733         }
734       t++;
735     }
736   if (delayed > 0) {
737     align_delayed(si);
738 
739         }
740 
741   delete si->lma;
742   xfree(scorematrix);
743 }
744 
search_findbest2_byid(struct searchinfo_s * si_p,struct searchinfo_s * si_m)745 struct hit * search_findbest2_byid(struct searchinfo_s * si_p,
746                                    struct searchinfo_s * si_m)
747 {
748   struct hit * best = nullptr;
749 
750   for(int i=0; i < si_p->hit_count; i++) {
751     if ((!best) || (hit_compare_byid_typed(si_p->hits + i, best) < 0)) {
752       best = si_p->hits + i;
753 
754         }
755 
756         }
757 
758   if (opt_strand>1) {
759     for(int i=0; i < si_m->hit_count; i++) {
760       if ((!best) || (hit_compare_byid_typed(si_m->hits + i, best) < 0)) {
761         best = si_m->hits + i;
762 
763         }
764 
765         }
766 
767         }
768 
769   if (best && ! best->accepted) {
770     best = nullptr;
771 
772         }
773 
774   return best;
775 }
776 
search_findbest2_bysize(struct searchinfo_s * si_p,struct searchinfo_s * si_m)777 struct hit * search_findbest2_bysize(struct searchinfo_s * si_p,
778                                      struct searchinfo_s * si_m)
779 {
780   struct hit * best = nullptr;
781 
782   for(int i=0; i < si_p->hit_count; i++) {
783     if ((!best) || (hit_compare_bysize_typed(si_p->hits + i, best) < 0)) {
784       best = si_p->hits + i;
785 
786         }
787 
788         }
789 
790   if (opt_strand>1) {
791     for(int i=0; i < si_m->hit_count; i++) {
792       if ((!best) || (hit_compare_bysize_typed(si_m->hits + i, best) < 0)) {
793         best = si_m->hits + i;
794 
795         }
796 
797         }
798 
799         }
800 
801   if (best && ! best->accepted) {
802     best = nullptr;
803 
804         }
805 
806   return best;
807 }
808 
search_joinhits(struct searchinfo_s * si_p,struct searchinfo_s * si_m,struct hit ** hitsp,int * hit_count)809 void search_joinhits(struct searchinfo_s * si_p,
810                      struct searchinfo_s * si_m,
811                      struct hit * * hitsp,
812                      int * hit_count)
813 {
814   /* join and sort accepted hits from both strands */
815   /* remove and unallocate unaccepted hits */
816 
817   int a = 0;
818   for (int s = 0; s < opt_strand; s++)
819     {
820       struct searchinfo_s * si = s ? si_m : si_p;
821       for(int i=0; i<si->hit_count; i++) {
822         if (si->hits[i].accepted) {
823           a++;
824 
825         }
826 
827         }
828     }
829 
830   auto * hits = (struct hit *) xmalloc(a * sizeof(struct hit));
831 
832   a = 0;
833 
834   for (int s = 0; s < opt_strand; s++)
835     {
836       struct searchinfo_s * si = s ? si_m : si_p;
837       for(int i=0; i<si->hit_count; i++)
838         {
839           struct hit * h = si->hits + i;
840           if (h->accepted) {
841             hits[a++] = *h;
842           } else if (h->aligned) {
843             xfree(h->nwalignment);
844 
845         }
846         }
847     }
848 
849   qsort(hits, a, sizeof(struct hit), hit_compare_byid);
850 
851   *hitsp = hits;
852   *hit_count = a;
853 }
854