1 /*
2
3 VSEARCH: a versatile open source tool for metagenomics
4
5 Copyright (C) 2014-2021, Torbjorn Rognes, Frederic Mahe and Tomas Flouri
6 All rights reserved.
7
8 Contact: Torbjorn Rognes <torognes@ifi.uio.no>,
9 Department of Informatics, University of Oslo,
10 PO Box 1080 Blindern, NO-0316 Oslo, Norway
11
12 This software is dual-licensed and available under a choice
13 of one of two licenses, either under the terms of the GNU
14 General Public License version 3 or the BSD 2-Clause License.
15
16
17 GNU General Public License version 3
18
19 This program is free software: you can redistribute it and/or modify
20 it under the terms of the GNU General Public License as published by
21 the Free Software Foundation, either version 3 of the License, or
22 (at your option) any later version.
23
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 GNU General Public License for more details.
28
29 You should have received a copy of the GNU General Public License
30 along with this program. If not, see <http://www.gnu.org/licenses/>.
31
32
33 The BSD 2-Clause License
34
35 Redistribution and use in source and binary forms, with or without
36 modification, are permitted provided that the following conditions
37 are met:
38
39 1. Redistributions of source code must retain the above copyright
40 notice, this list of conditions and the following disclaimer.
41
42 2. Redistributions in binary form must reproduce the above copyright
43 notice, this list of conditions and the following disclaimer in the
44 documentation and/or other materials provided with the distribution.
45
46 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
47 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
48 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
49 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
50 COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
51 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
52 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
53 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
54 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
56 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
57 POSSIBILITY OF SUCH DAMAGE.
58
59 */
60
61 #include "vsearch.h"
62
63 /* per thread data */
64
hit_compare_byid_typed(struct hit * x,struct hit * y)65 inline int hit_compare_byid_typed(struct hit * x, struct hit * y)
66 {
67 // high id, then low id
68 // early target, then late target
69
70 if (x->rejected < y->rejected) {
71 return -1;
72 } else
73 if (x->rejected > y->rejected) {
74 return +1;
75 } else
76 if (x->rejected == 1) {
77 return 0;
78 } else
79 if (x->aligned > y->aligned) {
80 return -1;
81 } else
82 if (x->aligned < y->aligned) {
83 return +1;
84 } else
85 if (x->aligned == 0) {
86 return 0;
87 } else
88 if (x->id > y->id) {
89 return -1;
90 } else
91 if (x->id < y->id) {
92 return +1;
93 } else
94 if (x->target < y->target) {
95 return -1;
96 } else
97 if (x->target > y->target) {
98 return +1;
99 } else {
100 return 0;
101
102 }
103 }
104
hit_compare_bysize_typed(struct hit * x,struct hit * y)105 inline int hit_compare_bysize_typed(struct hit * x, struct hit * y)
106 {
107 // high abundance, then low abundance
108 // high id, then low id
109 // early target, then late target
110
111 if (x->rejected < y->rejected) {
112 return -1;
113 } else
114 if (x->rejected > y->rejected) {
115 return +1;
116 } else
117 if (x->rejected == 1) {
118 return 0;
119 } else
120 if (x->aligned > y->aligned) {
121 return -1;
122 } else
123 if (x->aligned < y->aligned) {
124 return +1;
125 } else
126 if (x->aligned == 0) {
127 return 0;
128 } else
129 if (db_getabundance(x->target) > db_getabundance(y->target)) {
130 return -1;
131 } else
132 if (db_getabundance(x->target) < db_getabundance(y->target)) {
133 return +1;
134 } else
135 if (x->id > y->id) {
136 return -1;
137 } else
138 if (x->id < y->id) {
139 return +1;
140 } else
141 if (x->target < y->target) {
142 return -1;
143 } else
144 if (x->target > y->target) {
145 return +1;
146 } else {
147 return 0;
148
149 }
150 }
151
hit_compare_byid(const void * a,const void * b)152 int hit_compare_byid(const void * a, const void * b)
153 {
154 return hit_compare_byid_typed((struct hit *) a, (struct hit *) b);
155 }
156
hit_compare_bysize(const void * a,const void * b)157 int hit_compare_bysize(const void * a, const void * b)
158 {
159 return hit_compare_bysize_typed((struct hit *) a, (struct hit *) b);
160 }
161
search_enough_kmers(struct searchinfo_s * si,unsigned int count)162 bool search_enough_kmers(struct searchinfo_s * si,
163 unsigned int count)
164 {
165 return (count >= opt_minwordmatches) || (count >= si->kmersamplecount);
166 }
167
search_topscores(struct searchinfo_s * si)168 void search_topscores(struct searchinfo_s * si)
169 {
170 /*
171 Count the kmer hits in each database sequence and
172 make a sorted list of a given number (th)
173 of the database sequences with the highest number of matching kmers.
174 These are stored in the min heap array.
175 */
176
177 /* count kmer hits in the database sequences */
178 int indexed_count = dbindex_getcount();
179
180 /* zero counts */
181 memset(si->kmers, 0, indexed_count * sizeof(count_t));
182
183 minheap_empty(si->m);
184
185 for(unsigned int i=0; i<si->kmersamplecount; i++)
186 {
187 unsigned int kmer = si->kmersample[i];
188 unsigned char * bitmap = dbindex_getbitmap(kmer);
189
190 if (bitmap)
191 {
192 #ifdef __x86_64__
193 if (ssse3_present) {
194 increment_counters_from_bitmap_ssse3(si->kmers,
195 bitmap, indexed_count);
196 } else {
197 increment_counters_from_bitmap_sse2(si->kmers,
198 bitmap, indexed_count);
199
200 }
201 #else
202 increment_counters_from_bitmap(si->kmers, bitmap, indexed_count);
203 #endif
204 }
205 else
206 {
207 unsigned int * list = dbindex_getmatchlist(kmer);
208 unsigned int count = dbindex_getmatchcount(kmer);
209 for(unsigned int j=0; j < count; j++) {
210 si->kmers[list[j]]++;
211
212 }
213 }
214 }
215
216 int minmatches = MIN(opt_minwordmatches, si->kmersamplecount);
217
218 for(int i=0; i < indexed_count; i++)
219 {
220 count_t count = si->kmers[i];
221 if (count >= minmatches)
222 {
223 unsigned int seqno = dbindex_getmapping(i);
224 unsigned int length = db_getsequencelen(seqno);
225
226 elem_t novel;
227 novel.count = count;
228 novel.seqno = seqno;
229 novel.length = length;
230
231 minheap_add(si->m, & novel);
232 }
233 }
234
235 minheap_sort(si->m);
236 }
237
seqncmp(char * a,char * b,uint64_t n)238 int seqncmp(char * a, char * b, uint64_t n)
239 {
240 for(unsigned int i = 0; i<n; i++)
241 {
242 int x = chrmap_4bit[(int)(a[i])];
243 int y = chrmap_4bit[(int)(b[i])];
244 if (x < y) {
245 return -1;
246 } else if (x > y) {
247 return +1;
248
249 }
250 }
251 return 0;
252 }
253
254
align_trim(struct hit * hit)255 void align_trim(struct hit * hit)
256 {
257 /* trim alignment and fill in info */
258 /* assumes that the hit has been aligned */
259
260 /* info for semi-global alignment (without gaps at ends) */
261
262 hit->trim_aln_left = 0;
263 hit->trim_q_left = 0;
264 hit->trim_t_left = 0;
265 hit->trim_aln_right = 0;
266 hit->trim_q_right = 0;
267 hit->trim_t_right = 0;
268
269 /* left trim alignment */
270
271 char * p = hit->nwalignment;
272 char op;
273 int64_t run;
274 if (*p)
275 {
276 run = 1;
277 int scanlength = 0;
278 sscanf(p, "%" PRId64 "%n", &run, &scanlength);
279 op = *(p+scanlength);
280 if (op != 'M')
281 {
282 hit->trim_aln_left = 1 + scanlength;
283 if (op == 'D') {
284 hit->trim_q_left = run;
285 } else {
286 hit->trim_t_left = run;
287
288 }
289 }
290 }
291
292 /* right trim alignment */
293
294 char * e = hit->nwalignment + strlen(hit->nwalignment);
295 if (e > hit->nwalignment)
296 {
297 p = e - 1;
298 op = *p;
299 if (op != 'M')
300 {
301 while ((p > hit->nwalignment) && (*(p-1) <= '9')) {
302 p--;
303
304 }
305 run = 1;
306 sscanf(p, "%" PRId64, &run);
307 hit->trim_aln_right = e - p;
308 if (op == 'D') {
309 hit->trim_q_right = run;
310 } else {
311 hit->trim_t_right = run;
312
313 }
314 }
315 }
316
317 if (hit->trim_q_left >= hit->nwalignmentlength) {
318 hit->trim_q_right = 0;
319
320 }
321
322 if (hit->trim_t_left >= hit->nwalignmentlength) {
323 hit->trim_t_right = 0;
324
325 }
326
327 hit->internal_alignmentlength = hit->nwalignmentlength
328 - hit->trim_q_left - hit->trim_t_left
329 - hit->trim_q_right - hit->trim_t_right;
330
331 hit->internal_indels = hit->nwindels
332 - hit->trim_q_left - hit->trim_t_left
333 - hit->trim_q_right - hit->trim_t_right;
334
335 hit->internal_gaps = hit->nwgaps
336 - ((hit->trim_q_left + hit->trim_t_left) > 0 ? 1 : 0)
337 - ((hit->trim_q_right + hit->trim_t_right) > 0 ? 1 : 0);
338
339 /* CD-HIT */
340 hit->id0 = hit->shortest > 0 ? 100.0 * hit->matches / hit->shortest : 0.0;
341 /* all diffs */
342 hit->id1 = hit->nwalignmentlength > 0 ?
343 100.0 * hit->matches / hit->nwalignmentlength : 0.0;
344 /* internal diffs */
345 hit->id2 = hit->internal_alignmentlength > 0 ?
346 100.0 * hit->matches / hit->internal_alignmentlength : 0.0;
347 /* Marine Biology Lab */
348 hit->id3 = MAX(0.0, 100.0 * (1.0 - (1.0 * (hit->mismatches + hit->nwgaps) /
349 hit->longest)));
350 /* BLAST */
351 hit->id4 = hit->nwalignmentlength > 0 ?
352 100.0 * hit->matches / hit->nwalignmentlength : 0.0;
353
354 switch (opt_iddef)
355 {
356 case 0:
357 hit->id = hit->id0;
358 break;
359 case 1:
360 hit->id = hit->id1;
361 break;
362 case 2:
363 hit->id = hit->id2;
364 break;
365 case 3:
366 hit->id = hit->id3;
367 break;
368 case 4:
369 hit->id = hit->id4;
370 break;
371 }
372 }
373
search_acceptable_unaligned(struct searchinfo_s * si,int target)374 int search_acceptable_unaligned(struct searchinfo_s * si,
375 int target)
376 {
377 /* consider whether a hit satisfy accept criteria before alignment */
378
379 char * qseq = si->qsequence;
380 char * dlabel = db_getheader(target);
381 char * dseq = db_getsequence(target);
382 int64_t dseqlen = db_getsequencelen(target);
383 int64_t tsize = db_getabundance(target);
384
385 if (
386 /* maxqsize */
387 (si->qsize <= opt_maxqsize)
388 &&
389 /* mintsize */
390 (tsize >= opt_mintsize)
391 &&
392 /* minsizeratio */
393 (si->qsize >= opt_minsizeratio * tsize)
394 &&
395 /* maxsizeratio */
396 (si->qsize <= opt_maxsizeratio * tsize)
397 &&
398 /* minqt */
399 (si->qseqlen >= opt_minqt * dseqlen)
400 &&
401 /* maxqt */
402 (si->qseqlen <= opt_maxqt * dseqlen)
403 &&
404 /* minsl */
405 (si->qseqlen < dseqlen ?
406 si->qseqlen >= opt_minsl * dseqlen :
407 dseqlen >= opt_minsl * si->qseqlen)
408 &&
409 /* maxsl */
410 (si->qseqlen < dseqlen ?
411 si->qseqlen <= opt_maxsl * dseqlen :
412 dseqlen <= opt_maxsl * si->qseqlen)
413 &&
414 /* idprefix */
415 ((si->qseqlen >= opt_idprefix) &&
416 (dseqlen >= opt_idprefix) &&
417 (!seqncmp(qseq, dseq, opt_idprefix)))
418 &&
419 /* idsuffix */
420 ((si->qseqlen >= opt_idsuffix) &&
421 (dseqlen >= opt_idsuffix) &&
422 (!seqncmp(qseq+si->qseqlen-opt_idsuffix,
423 dseq+dseqlen-opt_idsuffix,
424 opt_idsuffix)))
425 &&
426 /* self */
427 ((!opt_self) || (strcmp(si->query_head, dlabel)))
428 &&
429 /* selfid */
430 ((!opt_selfid) ||
431 (si->qseqlen != dseqlen) ||
432 (seqncmp(qseq, dseq, si->qseqlen)))
433 )
434 {
435 /* needs further consideration */
436 return 1;
437 }
438 else
439 {
440 /* reject */
441 return 0;
442 }
443 }
444
search_acceptable_aligned(struct searchinfo_s * si,struct hit * hit)445 int search_acceptable_aligned(struct searchinfo_s * si,
446 struct hit * hit)
447 {
448 if (/* weak_id */
449 (hit->id >= 100.0 * opt_weak_id) &&
450 /* maxsubs */
451 (hit->mismatches <= opt_maxsubs) &&
452 /* maxgaps */
453 (hit->internal_gaps <= opt_maxgaps) &&
454 /* mincols */
455 (hit->internal_alignmentlength >= opt_mincols) &&
456 /* leftjust */
457 ((!opt_leftjust) || (hit->trim_q_left +
458 hit->trim_t_left == 0)) &&
459 /* rightjust */
460 ((!opt_rightjust) || (hit->trim_q_right +
461 hit->trim_t_right == 0)) &&
462 /* query_cov */
463 (hit->matches + hit->mismatches >= opt_query_cov * si->qseqlen) &&
464 /* target_cov */
465 (hit->matches + hit->mismatches >=
466 opt_target_cov * db_getsequencelen(hit->target)) &&
467 /* maxid */
468 (hit->id <= 100.0 * opt_maxid) &&
469 /* mid */
470 (100.0 * hit->matches / (hit->matches + hit->mismatches) >= opt_mid) &&
471 /* maxdiffs */
472 (hit->mismatches + hit->internal_indels <= opt_maxdiffs))
473 {
474 if (opt_cluster_unoise)
475 {
476 int d = hit->mismatches;
477 double skew = 1.0 * si->qsize / db_getabundance(hit->target);
478 double beta = 1.0 / pow(2, 1.0 * opt_unoise_alpha * d + 1);
479
480 if (skew <= beta || d == 0)
481 {
482 /* accepted */
483 hit->accepted = true;
484 hit->weak = false;
485 return 1;
486 }
487 else
488 {
489 /* rejected, but weak hit */
490 hit->rejected = true;
491 hit->weak = true;
492 return 0;
493 }
494 }
495 else
496 {
497 if (hit->id >= 100.0 * opt_id)
498 {
499 /* accepted */
500 hit->accepted = true;
501 hit->weak = false;
502 return 1;
503 }
504 else
505 {
506 /* rejected, but weak hit */
507 hit->rejected = true;
508 hit->weak = true;
509 return 0;
510 }
511 }
512 }
513 else
514 {
515 /* rejected */
516 hit->rejected = true;
517 hit->weak = false;
518 return 0;
519 }
520 }
521
align_delayed(struct searchinfo_s * si)522 void align_delayed(struct searchinfo_s * si)
523 {
524 /* compute global alignment */
525
526 unsigned int target_list[MAXDELAYED];
527 CELL nwscore_list[MAXDELAYED];
528 unsigned short nwalignmentlength_list[MAXDELAYED];
529 unsigned short nwmatches_list[MAXDELAYED];
530 unsigned short nwmismatches_list[MAXDELAYED];
531 unsigned short nwgaps_list[MAXDELAYED];
532 char * nwcigar_list[MAXDELAYED];
533
534 int target_count = 0;
535
536 for(int x = si->finalized; x < si->hit_count; x++)
537 {
538 struct hit * hit = si->hits + x;
539 if (! hit->rejected) {
540 target_list[target_count++] = hit->target;
541
542 }
543 }
544
545 if (target_count) {
546 search16(si->s,
547 target_count,
548 target_list,
549 nwscore_list,
550 nwalignmentlength_list,
551 nwmatches_list,
552 nwmismatches_list,
553 nwgaps_list,
554 nwcigar_list);
555
556 }
557
558 int i = 0;
559
560 for(int x = si->finalized; x < si->hit_count; x++)
561 {
562 /* maxrejects or maxaccepts reached - ignore remaining hits */
563 if ((si->rejects < opt_maxrejects) && (si->accepts < opt_maxaccepts))
564 {
565 struct hit * hit = si->hits + x;
566
567 if (hit->rejected)
568 {
569 si->rejects++;
570 }
571 else
572 {
573 int64_t target = hit->target;
574 int64_t nwscore = nwscore_list[i];
575
576 char * nwcigar;
577 int64_t nwalignmentlength;
578 int64_t nwmatches;
579 int64_t nwmismatches;
580 int64_t nwgaps;
581
582 int64_t dseqlen = db_getsequencelen(target);
583
584 if (nwscore == SHRT_MAX)
585 {
586 /* In case the SIMD aligner cannot align,
587 perform a new alignment with the
588 linear memory aligner */
589
590 char * dseq = db_getsequence(target);
591
592 if (nwcigar_list[i]) {
593 xfree(nwcigar_list[i]);
594
595 }
596
597 nwcigar = xstrdup(si->lma->align(si->qsequence,
598 dseq,
599 si->qseqlen,
600 dseqlen));
601
602 si->lma->alignstats(nwcigar,
603 si->qsequence,
604 dseq,
605 & nwscore,
606 & nwalignmentlength,
607 & nwmatches,
608 & nwmismatches,
609 & nwgaps);
610 }
611 else
612 {
613 nwalignmentlength = nwalignmentlength_list[i];
614 nwmatches = nwmatches_list[i];
615 nwmismatches = nwmismatches_list[i];
616 nwgaps = nwgaps_list[i];
617 nwcigar = nwcigar_list[i];
618 }
619
620 hit->aligned = true;
621 hit->shortest = MIN(si->qseqlen, dseqlen);
622 hit->longest = MAX(si->qseqlen, dseqlen);
623 hit->nwalignment = nwcigar;
624 hit->nwscore = nwscore;
625 hit->nwdiff = nwalignmentlength - nwmatches;
626 hit->nwgaps = nwgaps;
627 hit->nwindels = nwalignmentlength - nwmatches - nwmismatches;
628 hit->nwalignmentlength = nwalignmentlength;
629 hit->nwid = 100.0 * (nwalignmentlength - hit->nwdiff) /
630 nwalignmentlength;
631 hit->matches = nwalignmentlength - hit->nwdiff;
632 hit->mismatches = hit->nwdiff - hit->nwindels;
633
634 /* trim alignment and compute numbers excluding terminal gaps */
635 align_trim(hit);
636
637 /* test accept/reject criteria after alignment */
638 if (search_acceptable_aligned(si, hit)) {
639 si->accepts++;
640 } else {
641 si->rejects++;
642
643 }
644
645 i++;
646 }
647 }
648 }
649
650 /* free ignored alignments */
651 while (i < target_count) {
652 xfree(nwcigar_list[i++]);
653
654 }
655
656 si->finalized = si->hit_count;
657 }
658
search_onequery(struct searchinfo_s * si,int seqmask)659 void search_onequery(struct searchinfo_s * si, int seqmask)
660 {
661 si->hit_count = 0;
662
663 search16_qprep(si->s, si->qsequence, si->qseqlen);
664
665 si->lma = new LinearMemoryAligner;
666
667 int64_t * scorematrix = si->lma->scorematrix_create(opt_match, opt_mismatch);
668
669 si->lma->set_parameters(scorematrix,
670 opt_gap_open_query_left,
671 opt_gap_open_target_left,
672 opt_gap_open_query_interior,
673 opt_gap_open_target_interior,
674 opt_gap_open_query_right,
675 opt_gap_open_target_right,
676 opt_gap_extension_query_left,
677 opt_gap_extension_target_left,
678 opt_gap_extension_query_interior,
679 opt_gap_extension_target_interior,
680 opt_gap_extension_query_right,
681 opt_gap_extension_target_right);
682
683 /* extract unique kmer samples from query*/
684 unique_count(si->uh, opt_wordlength,
685 si->qseqlen, si->qsequence,
686 & si->kmersamplecount, & si->kmersample, seqmask);
687
688 /* find database sequences with the most kmer hits */
689 search_topscores(si);
690
691 /* analyse targets with the highest number of kmer hits */
692 si->accepts = 0;
693 si->rejects = 0;
694 si->finalized = 0;
695
696 int delayed = 0;
697
698 int t = 0;
699 while ((si->finalized + delayed < opt_maxaccepts + opt_maxrejects - 1) &&
700 (si->rejects < opt_maxrejects) &&
701 (si->accepts < opt_maxaccepts) &&
702 (!minheap_isempty(si->m)))
703 {
704 elem_t e = minheap_poplast(si->m);
705
706 struct hit * hit = si->hits + si->hit_count;
707
708 hit->target = e.seqno;
709 hit->count = e.count;
710 hit->strand = si->strand;
711 hit->rejected = false;
712 hit->accepted = false;
713 hit->aligned = false;
714 hit->weak = false;
715 hit->nwalignment = nullptr;
716
717 /* Test some accept/reject criteria before alignment */
718 if (search_acceptable_unaligned(si, e.seqno))
719 {
720 delayed++;
721 }
722 else
723 {
724 hit->rejected = true;
725 }
726
727 si->hit_count++;
728
729 if (delayed == MAXDELAYED)
730 {
731 align_delayed(si);
732 delayed = 0;
733 }
734 t++;
735 }
736 if (delayed > 0) {
737 align_delayed(si);
738
739 }
740
741 delete si->lma;
742 xfree(scorematrix);
743 }
744
search_findbest2_byid(struct searchinfo_s * si_p,struct searchinfo_s * si_m)745 struct hit * search_findbest2_byid(struct searchinfo_s * si_p,
746 struct searchinfo_s * si_m)
747 {
748 struct hit * best = nullptr;
749
750 for(int i=0; i < si_p->hit_count; i++) {
751 if ((!best) || (hit_compare_byid_typed(si_p->hits + i, best) < 0)) {
752 best = si_p->hits + i;
753
754 }
755
756 }
757
758 if (opt_strand>1) {
759 for(int i=0; i < si_m->hit_count; i++) {
760 if ((!best) || (hit_compare_byid_typed(si_m->hits + i, best) < 0)) {
761 best = si_m->hits + i;
762
763 }
764
765 }
766
767 }
768
769 if (best && ! best->accepted) {
770 best = nullptr;
771
772 }
773
774 return best;
775 }
776
search_findbest2_bysize(struct searchinfo_s * si_p,struct searchinfo_s * si_m)777 struct hit * search_findbest2_bysize(struct searchinfo_s * si_p,
778 struct searchinfo_s * si_m)
779 {
780 struct hit * best = nullptr;
781
782 for(int i=0; i < si_p->hit_count; i++) {
783 if ((!best) || (hit_compare_bysize_typed(si_p->hits + i, best) < 0)) {
784 best = si_p->hits + i;
785
786 }
787
788 }
789
790 if (opt_strand>1) {
791 for(int i=0; i < si_m->hit_count; i++) {
792 if ((!best) || (hit_compare_bysize_typed(si_m->hits + i, best) < 0)) {
793 best = si_m->hits + i;
794
795 }
796
797 }
798
799 }
800
801 if (best && ! best->accepted) {
802 best = nullptr;
803
804 }
805
806 return best;
807 }
808
search_joinhits(struct searchinfo_s * si_p,struct searchinfo_s * si_m,struct hit ** hitsp,int * hit_count)809 void search_joinhits(struct searchinfo_s * si_p,
810 struct searchinfo_s * si_m,
811 struct hit * * hitsp,
812 int * hit_count)
813 {
814 /* join and sort accepted hits from both strands */
815 /* remove and unallocate unaccepted hits */
816
817 int a = 0;
818 for (int s = 0; s < opt_strand; s++)
819 {
820 struct searchinfo_s * si = s ? si_m : si_p;
821 for(int i=0; i<si->hit_count; i++) {
822 if (si->hits[i].accepted) {
823 a++;
824
825 }
826
827 }
828 }
829
830 auto * hits = (struct hit *) xmalloc(a * sizeof(struct hit));
831
832 a = 0;
833
834 for (int s = 0; s < opt_strand; s++)
835 {
836 struct searchinfo_s * si = s ? si_m : si_p;
837 for(int i=0; i<si->hit_count; i++)
838 {
839 struct hit * h = si->hits + i;
840 if (h->accepted) {
841 hits[a++] = *h;
842 } else if (h->aligned) {
843 xfree(h->nwalignment);
844
845 }
846 }
847 }
848
849 qsort(hits, a, sizeof(struct hit), hit_compare_byid);
850
851 *hitsp = hits;
852 *hit_count = a;
853 }
854