1 /* $Id: pair.h 222194 2020-03-23 13:44:44Z twu $ */ 2 #ifndef PAIR_INCLUDED 3 #define PAIR_INCLUDED 4 5 typedef struct Pair_T *Pair_T; 6 7 #include "bool.h" 8 #include "genomicpos.h" 9 #include "chrnum.h" 10 #include "list.h" 11 #include "iit-read-univ.h" 12 #include "iit-read.h" 13 #include "sequence.h" 14 #include "reader.h" /* For cDNAEnd_T */ 15 #include "uintlist.h" 16 #include "genome.h" 17 #include "chimera.h" 18 #include "filestring.h" 19 #include "pairpool.h" 20 21 22 #define MATCHESPERGAP 3 23 24 typedef enum {CDS_CDNA, CDS_GENOMIC} CDStype_T; 25 typedef enum {CIGAR_ACTION_IGNORE, CIGAR_ACTION_WARNING, CIGAR_ACTION_NOPRINT, CIGAR_ACTION_ABORT} Cigar_action_T; 26 27 28 #define T Pair_T 29 30 extern void 31 Pair_setup (bool novelsplicingp_in, IIT_T splicesites_iit_in, int trim_indel_score_in, 32 bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in, 33 bool md_lowercase_variant_p_in, bool snps_p_in, 34 bool gff3_phase_swap_p_in, CDStype_T cdstype_in, 35 bool cigar_extended_p_in, Cigar_action_T cigar_action_in); 36 extern int 37 Pair_querypos (T this); 38 extern Chrpos_T 39 Pair_genomepos (T this); 40 extern char 41 Pair_cdna (T this); 42 extern char 43 Pair_comp (T this); 44 extern char 45 Pair_genome (T this); 46 extern char 47 Pair_genomealt (T this); 48 extern bool 49 Pair_gapp (T this); 50 extern bool 51 Pair_shortexonp (T this); 52 extern void 53 Pair_print_ends (List_T pairs); 54 55 extern void 56 Pair_set_genomepos (struct Pair_T *pairarray, int npairs, Univcoord_T chroffset, 57 Univcoord_T chrhigh, bool watsonp); 58 extern void 59 Pair_subtract_genomepos (struct T *pairs, int npairs, Chrpos_T adjustment); 60 61 #if 0 62 extern void 63 Pair_set_genomepos_list (List_T pairs, Univcoord_T chroffset, Univcoord_T chrhigh, 64 bool watsonp); 65 #endif 66 extern List_T 67 Pair_clip_bounded_list_5 (List_T source, int minpos, int maxpos); 68 extern List_T 69 Pair_clip_bounded_list_3 (List_T source, int minpos, int maxpos); 70 extern int 71 Pair_clip_bounded_array (struct T *source, int npairs, int minpos, int maxpos); 72 73 extern List_T 74 Pair_protect_end5 (List_T pairs); 75 extern List_T 76 Pair_protect_end3 (List_T pairs); 77 extern void 78 Pair_protect_list (List_T pairs); 79 80 extern T 81 Pair_new_out (int querypos, Chrpos_T genomepos, char cdna, char comp, char genome); 82 extern void 83 Pair_free_out (T *old); 84 85 extern int 86 Pair_translation_length (struct T *pairs, int npairs); 87 extern void 88 Pair_print_continuous (Filestring_T fp, struct T *pairs, int npairs, bool watsonp, 89 bool genomefirstp, int invertmode, bool nointronlenp); 90 91 extern void 92 Pair_print_continuous_byexon (Filestring_T fp, struct T *pairs, int npairs, bool watsonp, int invertmode); 93 extern void 94 Pair_print_alignment (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum, 95 Univcoord_T chroffset, Univ_IIT_T chromosome_iit, bool watsonp, 96 int invertmode, bool nointronlenp, int wraplength); 97 98 extern void 99 Pair_print_pathsummary (Filestring_T fp, int pathnum, T start, T end, Chrnum_T chrnum, 100 Univcoord_T chroffset, Univ_IIT_T chromosome_iit, bool referencealignp, 101 IIT_T altstrain_iit, char *strain, Univ_IIT_T contig_iit, char *dbversion, 102 int querylength_given, int skiplength, int trim_start, int trim_end, 103 int nexons, int matches, int unknowns, int mismatches, 104 int qopens, int qindels, int topens, int tindels, 105 bool watsonp, int cdna_direction, 106 int translation_start, int translation_end, int translation_length, 107 int relaastart, int relaaend); 108 109 extern void 110 Pair_print_coordinates (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum, 111 Univcoord_T chroffset, Univ_IIT_T chromosome_iit, 112 bool watsonp, int invertmode); 113 114 extern int 115 Pair_cmp (const void *a, const void *b); 116 117 extern void 118 Pair_dump_one (T this, bool zerobasedp); 119 extern void 120 Pair_dump_list (List_T pairs, bool zerobasedp); 121 extern void 122 Pair_dump_array (struct T *pairs, int npairs, bool zerobasedp); 123 extern void 124 Pair_dump_array_stderr (struct T *pairs, int npairs, bool zerobasedp); 125 extern void 126 Pair_dump_genome_array (struct T *pairs, int npairs); 127 extern void 128 Pair_dump_comp_array (struct T *pairs, int npairs); 129 extern Chrpos_T 130 Pair_genomicpos (struct T *pairs, int npairs, int querypos, bool headp); 131 extern int 132 Pair_codon_changepos (struct T *pairs, int npairs, int aapos, int cdna_direction); 133 134 extern bool 135 Pair_identical_p (List_T pairs1, List_T pairs2); 136 extern void 137 Pair_check_list_pairs (List_T pairs); 138 extern void 139 Pair_check_list_path (List_T path); 140 extern bool 141 Pair_check_array_pairs (struct T *pairs, int npairs); 142 extern bool 143 Pair_check_array_path (struct T *path, int npairs); 144 145 extern void 146 Pair_print_exonsummary (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum, 147 Univcoord_T chroffset, Genome_T genome, Univ_IIT_T chromosome_iit, 148 bool watsonp, int cdna_direction, bool genomefirstp, int invertmode); 149 150 extern int 151 Pair_cigar_length (List_T tokens); 152 extern void 153 Pair_print_tokens (Filestring_T fp, List_T tokens); 154 extern void 155 Pair_tokens_free (List_T *tokens); 156 extern List_T 157 Pair_tokens_copy (List_T old); 158 159 extern void 160 Pair_print_gff3 (Filestring_T fp, struct T *pairs, int npairs, int pathnum, char *accession, char *restofheader, 161 T start, T end, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment, 162 int translation_end, 163 int querylength_given, int skiplength, int matches, int mismatches, 164 int qindels, int tindels, int unknowns, bool watsonp, int cdna_direction, 165 bool gff_gene_format_p, bool gff_estmatch_format_p, char *sourcename); 166 167 #ifdef GSNAP 168 extern void 169 Pair_print_m8 (Filestring_T fp, struct T *pairs_querydir, int npairs, bool invertedp, 170 Chrnum_T chrnum, Shortread_T queryseq, Shortread_T headerseq, 171 char *acc_suffix, Univ_IIT_T chromosome_iit); 172 #endif 173 174 #ifndef PMAP 175 extern void 176 Pair_print_bedpe (Filestring_T fp, struct T *pairs_querydir, int npairs, 177 Chrnum_T chrnum, bool watsonp, Univ_IIT_T chromosome_iit); 178 #endif 179 180 extern void 181 Pair_fix_cdna_direction_array (struct T *pairs_querydir, int npairs, int cdna_direction); 182 extern int 183 Pair_guess_cdna_direction_array (int *sensedir, struct T *pairs_querydir, int npairs, bool invertedp, 184 Univcoord_T chroffset, bool watsonp); 185 extern int 186 Pair_guess_cdna_direction (int *sensedir, List_T pairs, bool invertedp, 187 Univcoord_T chroffset, bool watsonp); 188 extern int 189 Pair_gsnap_nsegments (int *total_nmismatches, int *total_nindels, int *nintrons, 190 int *nindelbreaks, struct T *pairs, int npairs, int querylength); 191 extern int 192 Pair_tokens_cigarlength (List_T tokens); 193 194 195 extern int 196 Pair_circularpos (int *alias, struct T *pairs, int npairs, Chrpos_T chrlength, bool plusp, int querylength); 197 extern void 198 Pair_alias_circular (struct T *pairs, int npairs, Chrpos_T chrlength); 199 extern void 200 Pair_unalias_circular (struct T *pairs, int npairs, Chrpos_T chrlength); 201 202 extern void 203 Pair_print_sam_nomapping (Filestring_T fp, char *abbrev, char *acc1, char *acc2, char *queryseq_ptr, 204 char *quality_string, int querylength, int quality_shift, 205 bool first_read_p, bool sam_paired_p, char *sam_read_group_id); 206 207 extern struct T * 208 Pair_hardclip (int *clipped_npairs, int hardclip_start, int hardclip_end, 209 struct T *pairs, int npairs, int querylength); 210 211 extern List_T 212 Pair_clean_cigar (List_T tokens, bool watsonp); 213 extern List_T 214 Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given, 215 bool watsonp, int chimera_part); 216 217 extern void 218 Pair_print_sam (Filestring_T fp, char *abbrev, struct Pair_T *pairarray, int npairs, 219 char *acc1, char *acc2, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment, 220 char *queryseq_ptr, char *quality_string, 221 int hardclip_low, int hardclip_high, int querylength_given, 222 bool watsonp, int sensedir, int chimera_part, Chimera_T chimera, 223 int quality_shift, bool first_read_p, int pathnum, int npaths_primary, int npaths_altloc, 224 int absmq_score, int second_absmq, Chrpos_T chrpos, Chrpos_T chrlength, 225 int mapq_score, bool sam_paired_p, char *sam_read_group_id); 226 227 extern List_T 228 Pair_compute_md_string (int *nmismatches_refdiff, int *nmismatches_bothdiff, int *nindels, 229 struct T *pairs, int npairs, bool watsonp, List_T cigar_tokens); 230 231 extern Uintlist_T 232 Pair_exonbounds (struct T *pairs, int npairs); 233 234 extern void 235 Pair_print_pslformat_nt (Filestring_T fp, struct T *pairs, int npairs, T start, T end, 236 Sequence_T queryseq, Chrnum_T chrnum, 237 Univ_IIT_T chromosome_iit, Sequence_T usersegment, 238 int matches, int unknowns, int mismatches, 239 bool watsonp); 240 241 242 extern void 243 Pair_print_pslformat_pro (Filestring_T fp, struct T *pairs, int npairs, T start, T end, 244 Sequence_T queryseq, Chrnum_T chrnum, 245 Univ_IIT_T chromosome_iit, Sequence_T usersegment, 246 bool watsonp, int cdna_direction); 247 248 extern void 249 Pair_print_exons (Filestring_T fp, struct T *pairs, int npairs, int wraplength, int ngap, bool cdnap); 250 251 extern void 252 Pair_print_protein_genomic (Filestring_T fp, struct T *ptr, int npairs, int wraplength, bool forwardp); 253 #ifdef PMAP 254 extern void 255 Pair_print_nucleotide_cdna (Filestring_T fp, struct T *ptr, int npairs, int wraplength); 256 #else 257 extern void 258 Pair_print_protein_cdna (Filestring_T fp, struct T *ptr, int npairs, int wraplength, bool forwardp); 259 #endif 260 261 extern void 262 Pair_print_compressed (Filestring_T fp, int pathnum, int npaths, T start, T end, Sequence_T queryseq, char *dbversion, 263 Sequence_T usersegment, int nexons, double fracidentity, 264 struct T *pairs, int npairs, Chrnum_T chrnum, 265 Univcoord_T chroffset, Univ_IIT_T chromosome_iit, int querylength_given, 266 int skiplength, int trim_start, int trim_end, bool checksump, 267 int chimerapos, int chimeraequivpos, double donor_prob, double acceptor_prob, 268 int chimera_cdna_direction, char *strain, int cdna_direction); 269 270 extern void 271 Pair_print_iit_map (Filestring_T fp, Sequence_T queryseq, char *accession, 272 T start, T end, Chrnum_T chrnum, Univ_IIT_T chromosome_iit); 273 extern void 274 Pair_print_iit_exon_map (Filestring_T fp, struct T *pairs, int npairs, Sequence_T queryseq, char *accession, 275 T start, T end, Chrnum_T chrnum, Univ_IIT_T chromosome_iit); 276 extern void 277 Pair_print_splicesites (Filestring_T fp, struct T *pairs, int npairs, char *accession, 278 int nexons, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, bool watsonp); 279 extern void 280 Pair_print_introns (Filestring_T fp, struct T *pairs, int npairs, char *accession, 281 int nexons, Chrnum_T chrnum, Univ_IIT_T chromosome_iit); 282 extern void 283 Pair_print_mask_introns (Filestring_T fp, struct T *pairs, int npairs, 284 Chrpos_T chrlength, int wraplength, bool include_utr_p); 285 286 287 extern int 288 Pair_nmatches_posttrim (int *max_match_length, List_T pairs, int pos5, int pos3); 289 extern int 290 Pair_array_nmatches_posttrim (struct T *pairs, int npairs, int pos5, int pos3); 291 extern int 292 Pair_nmismatches_region (int *nindelbreaks, int *nbadintrons, struct T *pairs, int npairs, 293 int trim_left, int trim_right, int start_amb_nmatches, int end_amb_nmatches, 294 int querylength); 295 296 extern int 297 Pair_goodness_simple (List_T pairs); 298 extern void 299 Pair_fracidentity_simple (int *matches, int *unknowns, int *mismatches, List_T pairs); 300 extern void 301 Pair_fracidentity (int *matches, int *unknowns, int *mismatches, 302 int *qopens, int *qindels, int *topens, int *tindels, 303 int *ncanonical, int *nsemicanonical, int *nnoncanonical, 304 double *min_splice_prob, List_T pairs, int cdna_direction); 305 extern int 306 Pair_fracidentity_array (int *matches, int *unknowns, int *mismatches, int *qopens, int *qindels, 307 int *topens, int *tindels, int *ncanonical, int *nsemicanonical, int *nnoncanonical, 308 double *min_splice_prob, struct T *ptr, int npairs, int cdna_direction); 309 extern int 310 Pair_fracidentity_score (List_T pairs); 311 312 extern double 313 Pair_frac_error (List_T pairs, int cdna_direction); 314 315 extern void 316 Pair_fracidentity_bounded (int *matches, int *unknowns, int *mismatches, 317 int *qopens, int *qindels, int *topens, int *tindels, 318 int *ncanonical, int *nsemicanonical, int *nnoncanonical, 319 struct T *pairs, int npairs, 320 int cdna_direction, int minpos, int maxpos); 321 extern void 322 Pair_matchscores (int *matchscores, struct T *ptr, int npairs); 323 extern int 324 Pair_maxnegscore (List_T pairs); 325 326 327 extern void 328 Pair_pathscores (bool *gapp, int *pathscores, struct T *ptr, int npairs, 329 int cdna_direction, int querylength, cDNAEnd_T cdnaend, int pre_extension_slop); 330 331 extern int 332 Pair_cdna_direction (List_T pairs); 333 extern int 334 Pair_nexons_approx (List_T pairs); 335 extern int 336 Pair_nexons (struct T *pairs, int npairs); 337 extern bool 338 Pair_consistentp (int *ncanonical, struct T *pairs, int npairs, int cdna_direction); 339 340 #ifndef PMAP 341 extern void 342 Pairarray_chrpos_bounds (Chrpos_T *chrpos_start, Chrpos_T *chrpos_end, 343 struct T *pairarray, int npairs); 344 #endif 345 346 extern Chrpos_T 347 Pairarray_genomicbound_from_start (struct T *pairarray, int npairs, int overlap); 348 extern Chrpos_T 349 Pairarray_genomicbound_from_end (struct T *pairarray, int npairs, int overlap); 350 extern char * 351 Pairarray_genomic_sequence (int *seqlength, struct T *pairarray, int npairs); 352 353 354 extern T 355 Pair_start_bound (int *cdna_direction, List_T pairs, int breakpoint); 356 extern T 357 Pair_end_bound (int *cdna_direction, List_T pairs, int breakpoint); 358 359 360 extern void 361 Pair_trim_distances (int *trim5, int *trim3, List_T pairs); 362 363 extern List_T 364 Pair_trim_ends (bool *trim5p, bool *trim3p, List_T pairs, int ambig_end_length_5, int ambig_end_length_3); 365 extern void 366 Pair_split_circular (List_T *pairs_below, List_T *pairs_above, List_T pairs, 367 Chrpos_T chrlength, Pairpool_T pairpool, bool plusp); 368 369 #undef T 370 #endif 371