1 /* 2 Copyright (c) 1996,1997,1998,1999,2000,2001,2004,2006 3 Whitehead Institute for Biomedical Research, Steve Rozen 4 (http://jura.wi.mit.edu/rozen), and Helen Skaletsky 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are 9 met: 10 11 * Redistributions of source code must retain the above copyright 12 notice, this list of conditions and the following disclaimer. 13 * Redistributions in binary form must reproduce the above 14 copyright notice, this list of conditions and the following disclaimer 15 in the documentation and/or other materials provided with the 16 distribution. 17 * Neither the names of the copyright holders nor contributors may 18 be used to endorse or promote products derived from this software 19 without specific prior written permission. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 OWNERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef PR_PRIMER_H 35 #define PR_PRIMER_H 1 36 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <stddef.h> 40 #include <string.h> 41 #include "dpal.h" 42 43 #define MAX_PRIMER_LENGTH 36 44 #define PR_INFINITE_POSITION_PENALTY -1.0 45 #define PR_DEFAULT_OUTSIDE_PENALTY 0.0 46 #define PR_DEFAULT_INSIDE_PENALTY PR_INFINITE_POSITION_PENALTY 47 #define PR_DEFAULT_PRODUCT_MAX_TM 1000000.0 48 #define PR_DEFAULT_PRODUCT_MIN_TM -1000000.0 49 #define PR_UNDEFINED_INT_OPT INT_MIN 50 #define PR_UNDEFINED_DBL_OPT DBL_MIN 51 /* Undefined value for alignment score (meaning do not check) used for maximum 52 template mispriming or mishyb. */ 53 #define PR_UNDEFINED_ALIGN_OPT -100 54 55 56 #define PR_POSITION_PENALTY_IS_NULL(PA) \ 57 (PR_DEFAULT_INSIDE_PENALTY == (PA)->inside_penalty \ 58 && PR_DEFAULT_OUTSIDE_PENALTY == (PA)->outside_penalty) 59 60 #define PR_NULL_START_CODON_POS -1000000 61 #define PR_DEFAULT_START_CODON_POS PR_NULL_START_CODON_POS 62 63 #define PR_START_CODON_POS_IS_NULL(SA) \ 64 ((SA)->start_codon_pos <= PR_NULL_START_CODON_POS) 65 66 /* Maxima needed for interface data structures. */ 67 #define PR_MAX_INTERVAL_ARRAY 200 /* 68 * Maximum number of input intervals 69 * supported; used for targets, excluded 70 * regions, product-size intervals, etc. 71 */ 72 73 #define PR_ALIGN_SCORE_PRECISION 100.0 74 #define PR_MAX_LIBRARY_WT 100.0 75 76 #define TRIMMED_SEQ_LEN(X) ((X)->incl_l) 77 78 #define MACRO_STRING(X) #X 79 /* pr_progam_name must be set in main(). */ 80 #define PR_ASSERT(COND) if(COND){} \ 81 82 typedef enum oligo_type { OT_LEFT = 0, OT_RIGHT = 1, OT_INTL = 2 } 83 oligo_type; 84 85 typedef enum oligo_violation { OV_UNINITIALIZED = -1, 86 OV_OK=0, 87 OV_TOO_MANY_NS=1, 88 OV_INTERSECT_TARGET=2, 89 OV_GC_CONTENT=3, 90 OV_TM_LOW=4, 91 OV_TM_HIGH=5, 92 OV_SELF_ANY=6, 93 OV_SELF_END=7, 94 OV_EXCL_REGION=8, 95 OV_GC_CLAMP=9, 96 OV_END_STAB=10, 97 OV_POLY_X=11, 98 OV_SEQ_QUALITY=12, 99 OV_LIB_SIM=13, 100 OV_TEMPLATE_MISPRIMING=14, 101 OV_GMASKED=14 /* edited by T. Koressaar for lowercase masking */ 102 } oligo_violation; 103 104 typedef struct rep_sim { 105 char *name; /* Name of the sequence from given file in fasta 106 * format with maximum similarity to the oligo. 107 */ 108 short min; /* 109 * The minimum score in slot 'score' (below). 110 * (Used when the objective function involves 111 * minimization of mispriming possibilities.) 112 */ 113 short max; /* The maximum score in slot 'score' (below). */ 114 short *score; /* 115 * Array of similarity (i.e. false-priming) scores, 116 * one for each entry in the 'repeat_lib' slot 117 * of the primargs struct. 118 */ 119 } rep_sim; 120 121 typedef struct primrec { 122 123 rep_sim repeat_sim; 124 /* Name of the sequence from given file in fasta 125 * format with maximum similarity to the oligo 126 * and corresponding alignment score. 127 */ 128 129 double temp; /* 130 * The oligo melting temperature calculated for the 131 * primer. 132 */ 133 134 double gc_content; 135 136 double position_penalty; 137 /* 138 * Penalty for distance from "ideal" position as specified 139 * by inside_penalty and outside_penalty. 140 */ 141 142 double quality; /* Part of objective function due to this primer. */ 143 144 double end_stability; 145 /* Delta G of disription of 5 3' bases. */ 146 int start; /* The 0-based index of the leftmost base of the primer 147 WITH RESPECT TO THE seq_args FIELD trimmed_seq. */ 148 int seq_quality; /* Minimum quality score of bases included. */ 149 short self_any; /* Self complementarity as local alignment * 100. */ 150 short self_end; /* Self complementarity at 3' end * 100. */ 151 short template_mispriming; 152 /* Max 3' complementarity to any ectopic site in template 153 on the given template strand. */ 154 short template_mispriming_r; 155 /* Max 3' complementarity to any ectopic site in the 156 template on the reverse complement of the given template 157 strand. */ 158 char target; /* 159 * 0 if this primer does not overlap any target, 1 if it 160 * does. 161 */ 162 char excl; /* 163 * 0 if does not overlap any excluded region, 1 if it 164 * does. 165 */ 166 oligo_violation ok; 167 char length; /* Length of the oligo. */ 168 char num_ns; /* Number of Ns in the oligo. */ 169 char position_penalty_infinite; 170 /* Non-0 if the position penalty is infinite. */ 171 char must_use; /* Non-0 if the oligo must be used even if it is illegal. */ 172 } primer_rec; 173 174 /* 175 * The structure for a pair of primers. (So that we can have a function which 176 * returns a pair of primers.) 177 */ 178 typedef struct primpair { 179 double pair_quality; 180 double compl_measure; /* 181 * A measure of self-complementarity of left and right 182 * primers in the pair, as well as complementarity 183 * between left and right primers. The function 184 * choice returns pairs with the minimal value for 185 * this field when 2 pairs have the same 186 * pair_quality. 187 */ 188 double diff_tm; /* Absolute value of the difference between melting 189 * temperatures for left and right primers. 190 */ 191 192 double product_tm; /* Estimated melting temperature of the product. */ 193 194 double product_tm_oligo_tm_diff; 195 /* Difference in Tm between the primer with lowest Tm 196 the product Tm. */ 197 198 double t_opt_a; 199 200 int compl_any; /* 201 * Local complementarity score between left and right 202 * primers (* 100). 203 */ 204 205 int compl_end; /* 206 * 3'-anchored global complementatory score between * 207 * left and right primers (* 100). 208 */ 209 210 int template_mispriming; 211 /* Maximum total mispriming score of both primers 212 to ectopic sites in the template, on "same" 213 strand (* 100). */ 214 215 short repeat_sim; /* Maximum total similarity of both primers to the 216 * sequence from given file in fasta format. 217 */ 218 primer_rec *left; /* Left primer. */ 219 primer_rec *right; /* Right primer. */ 220 primer_rec *intl; /* Internal oligo. */ 221 222 int product_size; /* Product size. */ 223 int target; /* 224 * 1 if there is a target between the right and left 225 * primers. 226 */ 227 char *rep_name; 228 } primer_pair; 229 230 typedef struct pair_array_t { 231 int storage_size; 232 int num_pairs; 233 primer_pair *pairs; 234 } pair_array_t; 235 236 typedef int interval_array_t[PR_MAX_INTERVAL_ARRAY][2]; 237 238 /* pr_append_str is an append-only string ADT. */ 239 typedef struct pr_append_str { 240 int storage_size; 241 char *data; 242 } pr_append_str; 243 244 /* The seq_lib struct represents a library of sequences. */ 245 typedef struct seq_lib { 246 char **names; /* An array of sequence names. */ 247 char **seqs; /* An array of sequences. */ 248 char **rev_compl_seqs;/* An array of reversed-complemented sequences. 249 x->rev_compl_seqs[i] is the reverse complement 250 of x->seqs[i], which lets us keep track of pairwise 251 mispriming. See reverse_complement_seq_lib(). */ 252 double *weight; /* An array of weights. */ 253 char *repeat_file; /* The path of the file containing the library. */ 254 pr_append_str error; /* Global error message if any. */ 255 pr_append_str warning;/* Warning message. */ 256 int seq_num; /* The number of names, sequences, and weights. */ 257 } seq_lib; 258 259 /* 260 * Arguments to the primer program as a whole. Values for these arguments are 261 * retained _across_ different input records. (These are the so-called 262 * "Global" arguments in the documentation.) 263 */ 264 typedef struct oligo_weights { 265 double temp_gt; 266 double temp_lt; 267 double gc_content_gt; 268 double gc_content_lt; 269 double compl_any; 270 double compl_end; 271 double repeat_sim; 272 double length_lt; 273 double length_gt; 274 double seq_quality; 275 double end_quality; 276 double pos_penalty; 277 double end_stability; 278 double num_ns; 279 double template_mispriming; 280 } oligo_weights; 281 282 typedef struct pair_weights { 283 double primer_quality; 284 double io_quality; 285 double diff_tm; 286 double compl_any; 287 double compl_end; 288 double product_tm_lt; 289 double product_tm_gt; 290 double product_size_lt; 291 double product_size_gt; 292 double repeat_sim; 293 double template_mispriming; 294 } pair_weights; 295 296 typedef enum task { pick_pcr_primers = 0, 297 pick_pcr_primers_and_hyb_probe = 1, 298 pick_left_only = 2, 299 pick_right_only = 3, 300 pick_hyb_probe_only =4, 301 } task; 302 303 typedef struct primargs { 304 int pr_min[PR_MAX_INTERVAL_ARRAY]; /* Minimum product sizes. */ 305 int pr_max[PR_MAX_INTERVAL_ARRAY]; /* Maximum product sizes. */ 306 seq_lib repeat_lib; /* Library of sequences to avoid. */ 307 308 seq_lib io_mishyb_library; 309 310 oligo_weights primer_weights; 311 oligo_weights io_weights; 312 pair_weights pr_pair_weights; 313 314 pr_append_str glob_err; 315 316 double opt_tm; 317 double min_tm; 318 double max_tm; 319 double max_diff_tm; 320 double opt_gc_content; 321 double max_gc; 322 double min_gc; 323 double salt_conc; 324 double divalent_conc; /* added by T.Koressaar, divalent salt concentration mmol/l */ 325 double dntp_conc; /* added by T.Koressaar, for considering divalent salt concentration */ 326 double dna_conc; 327 328 double io_opt_tm; 329 double io_min_tm; 330 double io_max_tm; 331 double io_opt_gc_content; 332 double io_max_gc; 333 double io_min_gc; 334 double io_salt_conc; 335 double io_divalent_conc; /* added by T.Koressaar, divalent salt concentration mmol/l */ 336 double io_dntp_conc; /* added by T.Koressaar, for considering divalent salt concentration */ 337 double io_dna_conc; 338 int tm_santalucia; /* added by T.Koressaar table of thermodynamic parameters of SantaLucia 1998 */ 339 int salt_corrections; /* added by T.Koressaar salt correction formula for Tm calculation */ 340 int lowercase_masking; /* added by T.Koressaar for primer design from lowercase masked template */ 341 342 double outside_penalty; /* Multiply this value times the number of NTs 343 * from the 3' end to the the (unique) target to 344 * get the 'position penalty'. 345 * Meaningless if there are multiple targets 346 * or if the primer cannot be part of a pair 347 * that spans the target. 348 */ 349 350 double inside_penalty; /* Multiply this value times the number of NT 351 * positions by which the primer overlaps 352 * the (unique) target to the 'position penalty'. 353 * Meaningless if there are multiple targets 354 * or if the primer cannot be part of a pair 355 * that spans the target. 356 */ 357 358 double product_max_tm; 359 double product_min_tm; 360 double product_opt_tm; 361 double max_end_stability; 362 /* The maximum value allowed for the delta 363 * G of disruption for the 5 3' bases of 364 * a primer. 365 */ 366 int num_intervals; /* 367 * Number of product size intervals 368 * (i.e. number of elements in pr_min and 369 * pr_max) 370 */ 371 int num_ns_accepted; 372 task primer_task; /* 2 if left primer only, 3 if right primer only, 373 * 4 if internal oligo only. */ 374 375 int file_flag; 376 int explain_flag; 377 int primer_opt_size; 378 int primer_min_size; 379 int primer_max_size; 380 int product_opt_size; 381 382 /*internal oligo*/ 383 int io_num_ns_accepted; 384 int io_primer_opt_size; 385 int io_primer_min_size; 386 int io_primer_max_size; 387 388 int gc_clamp; /* Required number of GCs at *3' end. */ 389 390 391 int liberal_base; /* 392 * If non-0 then turn characters other than 393 * [ATGCNatgcn] into N. 394 */ 395 396 int max_poly_x; /* 397 * Maximum length of mononucleotide sequence in an 398 * oligo. 399 */ 400 int io_max_poly_x; 401 402 403 int first_base_index; /* 404 * The index of the first base in the input 405 * sequence. This parameter is ignored within 406 * pr_choice; pr_choice's caller must assure that 407 * all indexes are 0-based. However, this 408 * parameter should used by output routines to 409 * adjust base indexes. 410 */ 411 int num_return; /* The number of best primer pairs to return. */ 412 int min_quality; /* Minimum quality permitted for oligo sequence.*/ 413 int min_end_quality; /* Minimum quality permitted at 3' end. */ 414 int quality_range_min; 415 int quality_range_max; 416 417 int io_min_quality; 418 int io_min_end_quality; 419 420 int pick_anyway; /* Pick even if input primer or oligos 421 violate constraints. */ 422 423 int lib_ambiguity_codes_consensus; 424 /* If non-0, treat ambiguity codes in a mispriming/mishyb 425 library as representing a consensus. So, for example, 426 S would match C or G. N would match any nucleotide. 427 It turns out that this _not_ what one normally wants, 428 since many libraries contain strings of N, which then 429 match every oligo (very bad). 430 */ 431 432 short max_template_mispriming; 433 short pair_max_template_mispriming; 434 435 short io_max_template_mishyb; 436 437 short repeat_compl; /* 438 * Acceptable complementarity with repeat 439 * sequences. 440 */ 441 short io_repeat_compl; 442 443 short pair_repeat_compl; 444 445 short self_any; 446 short self_end; 447 448 short io_self_any; 449 short io_self_end; 450 451 short pair_compl_any; 452 short pair_compl_end; 453 } primer_args; 454 455 typedef struct pair_stats { 456 int considered; /* Total number of pairs or triples tested. */ 457 int product; /* Pairs providing incorrect product size. */ 458 int target; /* Pairs without any target between primers. */ 459 int temp_diff; /* Melting temperature difference too high. */ 460 int compl_any; /* Pairwise complementarity larger than allowed. */ 461 int compl_end; /* The same for 3' end complementarity. */ 462 int internal; /* Internal oligo was not found. */ 463 int repeat_sim; /* Complementarity with repeat sequence too high.*/ 464 int high_tm; /* Product Tm too high. */ 465 int low_tm; /* Product Tm too low. */ 466 int template_mispriming; /* Sum of template mispriming scores too hihg. */ 467 int ok; /* Number that were ok. */ 468 } pair_stats; 469 470 typedef struct oligo_stats { 471 int considered; /* Total number of tested oligos of given type */ 472 int ns; /* Number of oligos rejected because of Ns */ 473 int target; /* Overlapping targets. */ 474 int excluded; /* Overlapping excluded regions. */ 475 int gc; /* Unacceptable GC content. */ 476 int gc_clamp; /* Don't have required number of GCs at 3' end. */ 477 int temp_min; /* Melting temperature below t_min. */ 478 int temp_max; /* Melting temperature more than t_max. */ 479 int compl_any; /* Self-complementarity too high. */ 480 int compl_end; /* Self-complementarity at 3' end too high. */ 481 int repeat_score; /* Complementarity with repeat sequence too high.*/ 482 int poly_x; /* Long mononucleotide sequence inside. */ 483 int seq_quality; /* Low quality of bases included. */ 484 int stability; /* Stability of 5 3' bases too high. */ 485 int no_orf; /* Would not amplify any of the specified ORF 486 (valid for left primers only). */ 487 int template_mispriming; /* Template mispriming score too high. */ 488 int ok; /* Number of acceptable oligos. */ 489 int gmasked; /* edited by T. Koressaar, number of gmasked oligo*/ 490 } oligo_stats; 491 492 /* 493 * Arguments relating to a particular source sequence for which 494 * we will pick primers. Values for seq_args are _not_ retained 495 * across different input records. 496 */ 497 typedef struct seqargs { 498 pr_append_str error; /* Error messages. */ 499 pr_append_str warning; /* Warning messages. */ 500 int num_targets; /* The number of targets. */ 501 interval_array_t tar; /* 502 * The targets themselves; tar[i][0] is the start 503 * of the ith target, tar[i][1] its length. These 504 * are presented as indexes within the sequence 505 * slot, but during the execution of choice() they 506 * are recalculated to be indexes within 507 * trimmed_seq. 508 */ 509 int num_excl; /* The number of excluded regions. */ 510 interval_array_t excl; /* The same as for targets. 511 * These are presented as indexes within 512 * the sequence slot, but during the 513 * execution of choice() they are recalculated 514 * to be indexes within trimmed_seq. 515 */ 516 int num_internal_excl; /* Number of excluded regions for internal oligo.*/ 517 interval_array_t excl_internal; 518 /* Similar to excl. */ 519 int incl_s; /* The 0-based start of included region. */ 520 int incl_l; /* 521 * The length of the included region, which is 522 * also the length of the trimmed_seq field. 523 */ 524 int start_codon_pos; /* Index of first base of the start codon. */ 525 int stop_codon_pos; /* 526 * An optional _output_, meaninful if a 527 * start_codon_pos is "not nul". The position of 528 * the intial base of the leftmost stop codon that 529 * is to the right of sa->start_codon_pos. 530 */ 531 const int *quality; /* Vector of quality scores. */ 532 const char *sequence; /* The template sequence itself as input, 533 not trimmed, not up-cased. */ 534 const char *sequence_name; /* An identifier for the sequence. */ 535 char *sequence_file; /* Another identifer for the sequence. */ 536 char *trimmed_seq; /* The included region only, _UPCASED_. */ 537 538 /* Element add by T. Koressaar support lowercase masking: */ 539 char *trimmed_orig_seq; /* Trimmed version of the original, 540 mixed-case sequence. */ 541 542 char *upcased_seq; /* Upper case version of sequence 543 (_not_ trimmed). */ 544 char *upcased_seq_r; /* Upper case version of sequence, 545 other strand (_not_ trimmed). */ 546 const char *left_input; /* A left primer to check or design around. */ 547 const char *right_input; /* A right primer to check or design around. */ 548 const char *internal_input; /* An internal oligo to check or design around. */ 549 550 oligo_stats left_expl; /* Left primers statistics. */ 551 oligo_stats right_expl; /* Right primers statistics. */ 552 oligo_stats intl_expl; /* Internal oligos statistics. */ 553 pair_stats pair_expl; /* Pair statistics. */ 554 } seq_args; 555 556 typedef struct Primer3Context_ { 557 primer_rec * f; 558 primer_rec * r; 559 primer_rec * mid; 560 int f_len; 561 int r_len; 562 int mid_len; 563 564 dpal_args * lib_local_dpal_args; 565 dpal_args * lib_local_end_dpal_args; 566 }Primer3Context; 567 568 569 /* 570 * pr_program_name and pr_program_name_len must be set in the client's 571 * main(). 572 */ 573 extern const char *pr_program_name; 574 575 extern int pr_program_name_len; 576 577 #ifdef __cplusplus 578 extern "C" { /* } */ 579 #endif 580 /* Assign default values to global (as opposed to per-sequence) primer picking 581 parameters. */ 582 void pr_set_default_global_args(primer_args *); 583 584 /* 585 * Return the ('\0'-terminated) sequence of oligo o in static storage that is 586 * overwritten on each call. 587 */ 588 char *pr_oligo_sequence(const seq_args *, const primer_rec *o); 589 590 /* 591 * Return the ('\0'-terminated) reverse-complemented sequence of oligo o in 592 * static storage that is overwritten on each call. 593 */ 594 char *pr_oligo_rev_c_sequence(const seq_args *, const primer_rec *o); 595 596 void *pr_safe_malloc(size_t); /* A fail-stop wrapper for malloc. */ 597 598 void *pr_safe_realloc(void *, size_t); 599 600 /* Append s to x. */ 601 void pr_append(pr_append_str *, const char *); 602 603 /* Append s to x if x is empty. Otherwise append concat("; ",s) to x. */ 604 void pr_append_new_chunk(pr_append_str *x, const char *s); 605 606 /* Append s to x if x is empty. Otherwise append concat(sep, s) to x. */ 607 void pr_append_w_sep(pr_append_str *x, const char *sep, const char *); 608 609 /* Set the argument to the empty string. */ 610 void pr_set_empty(pr_append_str *); 611 612 /* Return true iff the argument is empty. */ 613 int pr_is_empty(const pr_append_str *); 614 615 #ifdef __cplusplus 616 /* { */ } 617 #endif 618 619 /* 620 * Gather all warnings and return a char * containing them. The return value 621 * points to storage that the caller must free. Return NULL if there are no 622 * errors. 623 */ 624 char *pr_gather_warnings(const seq_args *, const primer_args *); 625 626 /* Choose primer pairs or triples */ 627 void pr_choice(primer_args *, seq_args *, const dpal_args *, 628 const dpal_args *, const dpal_args *, pair_array_t *, 629 int *, int *, int *, int * cancel, int * progress, Primer3Context * ctx); 630 631 /* Print an "explanation string" for primer pair search. */ 632 void pr_print_pair_explain(FILE *, const seq_args *); 633 634 int strcmp_nocase(const char *,const char *); 635 636 /* The following are _temporarily_ exposed symbols. */ 637 int _pr_data_control(primer_args *, seq_args *); 638 int _pr_need_template_mispriming(const primer_args *); 639 int _pr_need_pair_template_mispriming(const primer_args *); 640 void _pr_reverse_complement(const char *, char *); 641 void _pr_substr(const char *, int, int, char *); 642 #define _PR_DEFAULT_POSITION_PENALTIES(PA) \ 643 (PR_DEFAULT_INSIDE_PENALTY == pa->inside_penalty \ 644 && PR_DEFAULT_OUTSIDE_PENALTY == pa->outside_penalty) 645 #endif 646