1 /* $Id: pair.h 222194 2020-03-23 13:44:44Z twu $ */
2 #ifndef PAIR_INCLUDED
3 #define PAIR_INCLUDED
4 
5 typedef struct Pair_T *Pair_T;
6 
7 #include "bool.h"
8 #include "genomicpos.h"
9 #include "chrnum.h"
10 #include "list.h"
11 #include "iit-read-univ.h"
12 #include "iit-read.h"
13 #include "sequence.h"
14 #include "reader.h"		/* For cDNAEnd_T */
15 #include "uintlist.h"
16 #include "genome.h"
17 #include "chimera.h"
18 #include "filestring.h"
19 #include "pairpool.h"
20 
21 
22 #define MATCHESPERGAP 3
23 
24 typedef enum {CDS_CDNA, CDS_GENOMIC} CDStype_T;
25 typedef enum {CIGAR_ACTION_IGNORE, CIGAR_ACTION_WARNING, CIGAR_ACTION_NOPRINT, CIGAR_ACTION_ABORT} Cigar_action_T;
26 
27 
28 #define T Pair_T
29 
30 extern void
31 Pair_setup (bool novelsplicingp_in, IIT_T splicesites_iit_in, int trim_indel_score_in,
32 	    bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
33 	    bool md_lowercase_variant_p_in, bool snps_p_in,
34 	    bool gff3_phase_swap_p_in, CDStype_T cdstype_in,
35 	    bool cigar_extended_p_in, Cigar_action_T cigar_action_in);
36 extern int
37 Pair_querypos (T this);
38 extern Chrpos_T
39 Pair_genomepos (T this);
40 extern char
41 Pair_cdna (T this);
42 extern char
43 Pair_comp (T this);
44 extern char
45 Pair_genome (T this);
46 extern char
47 Pair_genomealt (T this);
48 extern bool
49 Pair_gapp (T this);
50 extern bool
51 Pair_shortexonp (T this);
52 extern void
53 Pair_print_ends (List_T pairs);
54 
55 extern void
56 Pair_set_genomepos (struct Pair_T *pairarray, int npairs, Univcoord_T chroffset,
57 		    Univcoord_T chrhigh, bool watsonp);
58 extern void
59 Pair_subtract_genomepos (struct T *pairs, int npairs, Chrpos_T adjustment);
60 
61 #if 0
62 extern void
63 Pair_set_genomepos_list (List_T pairs, Univcoord_T chroffset, Univcoord_T chrhigh,
64 			 bool watsonp);
65 #endif
66 extern List_T
67 Pair_clip_bounded_list_5 (List_T source, int minpos, int maxpos);
68 extern List_T
69 Pair_clip_bounded_list_3 (List_T source, int minpos, int maxpos);
70 extern int
71 Pair_clip_bounded_array (struct T *source, int npairs, int minpos, int maxpos);
72 
73 extern List_T
74 Pair_protect_end5 (List_T pairs);
75 extern List_T
76 Pair_protect_end3 (List_T pairs);
77 extern void
78 Pair_protect_list (List_T pairs);
79 
80 extern T
81 Pair_new_out (int querypos, Chrpos_T genomepos, char cdna, char comp, char genome);
82 extern void
83 Pair_free_out (T *old);
84 
85 extern int
86 Pair_translation_length (struct T *pairs, int npairs);
87 extern void
88 Pair_print_continuous (Filestring_T fp, struct T *pairs, int npairs, bool watsonp,
89 		       bool genomefirstp, int invertmode, bool nointronlenp);
90 
91 extern void
92 Pair_print_continuous_byexon (Filestring_T fp, struct T *pairs, int npairs, bool watsonp, int invertmode);
93 extern void
94 Pair_print_alignment (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum,
95 		      Univcoord_T chroffset, Univ_IIT_T chromosome_iit, bool watsonp,
96 		      int invertmode, bool nointronlenp, int wraplength);
97 
98 extern void
99 Pair_print_pathsummary (Filestring_T fp, int pathnum, T start, T end, Chrnum_T chrnum,
100 			Univcoord_T chroffset, Univ_IIT_T chromosome_iit, bool referencealignp,
101 			IIT_T altstrain_iit, char *strain, Univ_IIT_T contig_iit, char *dbversion,
102 			int querylength_given, int skiplength, int trim_start, int trim_end,
103 			int nexons, int matches, int unknowns, int mismatches,
104 			int qopens, int qindels, int topens, int tindels,
105 			bool watsonp, int cdna_direction,
106 			int translation_start, int translation_end, int translation_length,
107 			int relaastart, int relaaend);
108 
109 extern void
110 Pair_print_coordinates (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum,
111 			Univcoord_T chroffset, Univ_IIT_T chromosome_iit,
112 			bool watsonp, int invertmode);
113 
114 extern int
115 Pair_cmp (const void *a, const void *b);
116 
117 extern void
118 Pair_dump_one (T this, bool zerobasedp);
119 extern void
120 Pair_dump_list (List_T pairs, bool zerobasedp);
121 extern void
122 Pair_dump_array (struct T *pairs, int npairs, bool zerobasedp);
123 extern void
124 Pair_dump_array_stderr (struct T *pairs, int npairs, bool zerobasedp);
125 extern void
126 Pair_dump_genome_array (struct T *pairs, int npairs);
127 extern void
128 Pair_dump_comp_array (struct T *pairs, int npairs);
129 extern Chrpos_T
130 Pair_genomicpos (struct T *pairs, int npairs, int querypos, bool headp);
131 extern int
132 Pair_codon_changepos (struct T *pairs, int npairs, int aapos, int cdna_direction);
133 
134 extern bool
135 Pair_identical_p (List_T pairs1, List_T pairs2);
136 extern void
137 Pair_check_list_pairs (List_T pairs);
138 extern void
139 Pair_check_list_path (List_T path);
140 extern bool
141 Pair_check_array_pairs (struct T *pairs, int npairs);
142 extern bool
143 Pair_check_array_path (struct T *path, int npairs);
144 
145 extern void
146 Pair_print_exonsummary (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum,
147 			Univcoord_T chroffset, Genome_T genome, Univ_IIT_T chromosome_iit,
148 			bool watsonp, int cdna_direction, bool genomefirstp, int invertmode);
149 
150 extern int
151 Pair_cigar_length (List_T tokens);
152 extern void
153 Pair_print_tokens (Filestring_T fp, List_T tokens);
154 extern void
155 Pair_tokens_free (List_T *tokens);
156 extern List_T
157 Pair_tokens_copy (List_T old);
158 
159 extern void
160 Pair_print_gff3 (Filestring_T fp, struct T *pairs, int npairs, int pathnum, char *accession, char *restofheader,
161 		 T start, T end, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
162 		 int translation_end,
163 		 int querylength_given, int skiplength, int matches, int mismatches,
164 		 int qindels, int tindels, int unknowns, bool watsonp, int cdna_direction,
165 		 bool gff_gene_format_p, bool gff_estmatch_format_p, char *sourcename);
166 
167 #ifdef GSNAP
168 extern void
169 Pair_print_m8 (Filestring_T fp, struct T *pairs_querydir, int npairs, bool invertedp,
170 	       Chrnum_T chrnum, Shortread_T queryseq, Shortread_T headerseq,
171 	       char *acc_suffix, Univ_IIT_T chromosome_iit);
172 #endif
173 
174 #ifndef PMAP
175 extern void
176 Pair_print_bedpe (Filestring_T fp, struct T *pairs_querydir, int npairs,
177 		  Chrnum_T chrnum, bool watsonp, Univ_IIT_T chromosome_iit);
178 #endif
179 
180 extern void
181 Pair_fix_cdna_direction_array (struct T *pairs_querydir, int npairs, int cdna_direction);
182 extern int
183 Pair_guess_cdna_direction_array (int *sensedir, struct T *pairs_querydir, int npairs, bool invertedp,
184 				 Univcoord_T chroffset, bool watsonp);
185 extern int
186 Pair_guess_cdna_direction (int *sensedir, List_T pairs, bool invertedp,
187 			   Univcoord_T chroffset, bool watsonp);
188 extern int
189 Pair_gsnap_nsegments (int *total_nmismatches, int *total_nindels, int *nintrons,
190 		      int *nindelbreaks, struct T *pairs, int npairs, int querylength);
191 extern int
192 Pair_tokens_cigarlength (List_T tokens);
193 
194 
195 extern int
196 Pair_circularpos (int *alias, struct T *pairs, int npairs, Chrpos_T chrlength, bool plusp, int querylength);
197 extern void
198 Pair_alias_circular (struct T *pairs, int npairs, Chrpos_T chrlength);
199 extern void
200 Pair_unalias_circular (struct T *pairs, int npairs, Chrpos_T chrlength);
201 
202 extern void
203 Pair_print_sam_nomapping (Filestring_T fp, char *abbrev, char *acc1, char *acc2, char *queryseq_ptr,
204 			  char *quality_string, int querylength, int quality_shift,
205 			  bool first_read_p, bool sam_paired_p, char *sam_read_group_id);
206 
207 extern struct T *
208 Pair_hardclip (int *clipped_npairs, int hardclip_start, int hardclip_end,
209 	       struct T *pairs, int npairs, int querylength);
210 
211 extern List_T
212 Pair_clean_cigar (List_T tokens, bool watsonp);
213 extern List_T
214 Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
215 		    bool watsonp, int chimera_part);
216 
217 extern void
218 Pair_print_sam (Filestring_T fp, char *abbrev, struct Pair_T *pairarray, int npairs,
219 		char *acc1, char *acc2, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
220 		char *queryseq_ptr, char *quality_string,
221 		int hardclip_low, int hardclip_high, int querylength_given,
222 		bool watsonp, int sensedir, int chimera_part, Chimera_T chimera,
223 		int quality_shift, bool first_read_p, int pathnum, int npaths_primary, int npaths_altloc,
224 		int absmq_score, int second_absmq, Chrpos_T chrpos, Chrpos_T chrlength,
225 		int mapq_score, bool sam_paired_p, char *sam_read_group_id);
226 
227 extern List_T
228 Pair_compute_md_string (int *nmismatches_refdiff, int *nmismatches_bothdiff, int *nindels,
229 			struct T *pairs, int npairs, bool watsonp, List_T cigar_tokens);
230 
231 extern Uintlist_T
232 Pair_exonbounds (struct T *pairs, int npairs);
233 
234 extern void
235 Pair_print_pslformat_nt (Filestring_T fp, struct T *pairs, int npairs, T start, T end,
236 			 Sequence_T queryseq, Chrnum_T chrnum,
237 			 Univ_IIT_T chromosome_iit, Sequence_T usersegment,
238 			 int matches, int unknowns, int mismatches,
239 			 bool watsonp);
240 
241 
242 extern void
243 Pair_print_pslformat_pro (Filestring_T fp, struct T *pairs, int npairs, T start, T end,
244 			  Sequence_T queryseq, Chrnum_T chrnum,
245 			  Univ_IIT_T chromosome_iit, Sequence_T usersegment,
246 			  bool watsonp, int cdna_direction);
247 
248 extern void
249 Pair_print_exons (Filestring_T fp, struct T *pairs, int npairs, int wraplength, int ngap, bool cdnap);
250 
251 extern void
252 Pair_print_protein_genomic (Filestring_T fp, struct T *ptr, int npairs, int wraplength, bool forwardp);
253 #ifdef PMAP
254 extern void
255 Pair_print_nucleotide_cdna (Filestring_T fp, struct T *ptr, int npairs, int wraplength);
256 #else
257 extern void
258 Pair_print_protein_cdna (Filestring_T fp, struct T *ptr, int npairs, int wraplength, bool forwardp);
259 #endif
260 
261 extern void
262 Pair_print_compressed (Filestring_T fp, int pathnum, int npaths, T start, T end, Sequence_T queryseq, char *dbversion,
263 		       Sequence_T usersegment, int nexons, double fracidentity,
264 		       struct T *pairs, int npairs, Chrnum_T chrnum,
265 		       Univcoord_T chroffset, Univ_IIT_T chromosome_iit, int querylength_given,
266 		       int skiplength, int trim_start, int trim_end, bool checksump,
267 		       int chimerapos, int chimeraequivpos, double donor_prob, double acceptor_prob,
268 		       int chimera_cdna_direction, char *strain, int cdna_direction);
269 
270 extern void
271 Pair_print_iit_map (Filestring_T fp, Sequence_T queryseq, char *accession,
272 		    T start, T end, Chrnum_T chrnum, Univ_IIT_T chromosome_iit);
273 extern void
274 Pair_print_iit_exon_map (Filestring_T fp, struct T *pairs, int npairs, Sequence_T queryseq, char *accession,
275 			 T start, T end, Chrnum_T chrnum, Univ_IIT_T chromosome_iit);
276 extern void
277 Pair_print_splicesites (Filestring_T fp, struct T *pairs, int npairs, char *accession,
278 			int nexons, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, bool watsonp);
279 extern void
280 Pair_print_introns (Filestring_T fp, struct T *pairs, int npairs, char *accession,
281 		    int nexons, Chrnum_T chrnum, Univ_IIT_T chromosome_iit);
282 extern void
283 Pair_print_mask_introns (Filestring_T fp, struct T *pairs, int npairs,
284 			 Chrpos_T chrlength, int wraplength, bool include_utr_p);
285 
286 
287 extern int
288 Pair_nmatches_posttrim (int *max_match_length, List_T pairs, int pos5, int pos3);
289 extern int
290 Pair_array_nmatches_posttrim (struct T *pairs, int npairs, int pos5, int pos3);
291 extern int
292 Pair_nmismatches_region (int *nindelbreaks, int *nbadintrons, struct T *pairs, int npairs,
293 			 int trim_left, int trim_right, int start_amb_nmatches, int end_amb_nmatches,
294 			 int querylength);
295 
296 extern int
297 Pair_goodness_simple (List_T pairs);
298 extern void
299 Pair_fracidentity_simple (int *matches, int *unknowns, int *mismatches, List_T pairs);
300 extern void
301 Pair_fracidentity (int *matches, int *unknowns, int *mismatches,
302 		   int *qopens, int *qindels, int *topens, int *tindels,
303 		   int *ncanonical, int *nsemicanonical, int *nnoncanonical,
304 		   double *min_splice_prob, List_T pairs, int cdna_direction);
305 extern int
306 Pair_fracidentity_array (int *matches, int *unknowns, int *mismatches, int *qopens, int *qindels,
307 			 int *topens, int *tindels, int *ncanonical, int *nsemicanonical, int *nnoncanonical,
308 			 double *min_splice_prob, struct T *ptr, int npairs, int cdna_direction);
309 extern int
310 Pair_fracidentity_score (List_T pairs);
311 
312 extern double
313 Pair_frac_error (List_T pairs, int cdna_direction);
314 
315 extern void
316 Pair_fracidentity_bounded (int *matches, int *unknowns, int *mismatches,
317 			   int *qopens, int *qindels, int *topens, int *tindels,
318 			   int *ncanonical, int *nsemicanonical, int *nnoncanonical,
319 			   struct T *pairs, int npairs,
320 			   int cdna_direction, int minpos, int maxpos);
321 extern void
322 Pair_matchscores (int *matchscores, struct T *ptr, int npairs);
323 extern int
324 Pair_maxnegscore (List_T pairs);
325 
326 
327 extern void
328 Pair_pathscores (bool *gapp, int *pathscores, struct T *ptr, int npairs,
329 		 int cdna_direction, int querylength, cDNAEnd_T cdnaend, int pre_extension_slop);
330 
331 extern int
332 Pair_cdna_direction (List_T pairs);
333 extern int
334 Pair_nexons_approx (List_T pairs);
335 extern int
336 Pair_nexons (struct T *pairs, int npairs);
337 extern bool
338 Pair_consistentp (int *ncanonical, struct T *pairs, int npairs, int cdna_direction);
339 
340 #ifndef PMAP
341 extern void
342 Pairarray_chrpos_bounds (Chrpos_T *chrpos_start, Chrpos_T *chrpos_end,
343 			 struct T *pairarray, int npairs);
344 #endif
345 
346 extern Chrpos_T
347 Pairarray_genomicbound_from_start (struct T *pairarray, int npairs, int overlap);
348 extern Chrpos_T
349 Pairarray_genomicbound_from_end (struct T *pairarray, int npairs, int overlap);
350 extern char *
351 Pairarray_genomic_sequence (int *seqlength, struct T *pairarray, int npairs);
352 
353 
354 extern T
355 Pair_start_bound (int *cdna_direction, List_T pairs, int breakpoint);
356 extern T
357 Pair_end_bound (int *cdna_direction, List_T pairs, int breakpoint);
358 
359 
360 extern void
361 Pair_trim_distances (int *trim5, int *trim3, List_T pairs);
362 
363 extern List_T
364 Pair_trim_ends (bool *trim5p, bool *trim3p, List_T pairs, int ambig_end_length_5, int ambig_end_length_3);
365 extern void
366 Pair_split_circular (List_T *pairs_below, List_T *pairs_above, List_T pairs,
367 		     Chrpos_T chrlength, Pairpool_T pairpool, bool plusp);
368 
369 #undef T
370 #endif
371