1alignOptions = {}; 2 3alignOptions ["SEQ_ALIGN_CHARACTER_MAP"]="ARNDCQEGHILKMFPSTWYV"; 4 5ChoiceList (refSeq,"Scoring Matrix",1,SKIP_NONE,"BLOSUM62","Default BLAST BLOSUM62 matrix", 6 "HIV 5%","Empirically derived 5% divergence HIV matrix", 7 "HIV 25%","Empirically derived 25% divergence HIV matrix", 8 "HIV 50%","Empirically derived 50% divergence HIV matrix"); 9 10if (refSeq < 0) 11{ 12 return 0; 13 14} 15if (refSeq == 0) 16{ 17/* 18# Divergence level: 0.38 19# Matrix from: TeraMonkey:Users:sergei:hyphy:trunk:TemplateBatchFiles:TemplateModels:EmpiricalAA:BLOSUM62 20# Expected score: -0.548341 Entropy: 4.88752 bits 21 A R N D C Q E G H I L K M F P S T W Y V B Z X * 22A 6 -3 -4 -4 -2 -2 -2 -1 -3 -3 -3 -2 -2 -4 -2 0 -1 -5 -3 -1 -4 -2 -2 -7 23R -3 8 -2 -4 -6 0 -2 -5 -2 -6 -4 1 -3 -5 -4 -2 -3 -5 -3 -5 -3 -1 -2 -7 24N -4 -2 8 0 -5 -1 -2 -2 0 -6 -6 -1 -4 -5 -4 0 -1 -7 -4 -5 6 -2 -2 -7 25D -4 -4 0 8 -6 -2 0 -3 -3 -5 -6 -2 -6 -6 -3 -1 -3 -7 -6 -6 6 0 -3 -7 26C -2 -6 -5 -6 10 -5 -7 -5 -5 -3 -3 -6 -3 -5 -5 -2 -2 -4 -4 -2 -5 -6 -4 -7 27Q -2 0 -1 -2 -5 8 1 -4 0 -6 -4 0 -1 -6 -3 -1 -2 -3 -3 -4 -1 6 -2 -7 28E -2 -2 -2 0 -7 1 7 -4 -1 -6 -5 0 -4 -6 -3 -1 -2 -5 -4 -4 0 6 -2 -7 29G -1 -5 -2 -3 -5 -4 -4 7 -4 -7 -6 -3 -5 -5 -4 -2 -4 -4 -5 -6 -2 -4 -4 -7 30H -3 -2 0 -3 -5 0 -1 -4 10 -6 -5 -2 -3 -3 -4 -2 -4 -5 0 -6 -1 -1 -3 -7 31I -3 -6 -6 -5 -3 -6 -6 -7 -6 6 0 -5 0 -1 -5 -5 -2 -5 -3 2 -5 -6 -2 -7 32L -3 -4 -6 -6 -3 -4 -5 -6 -5 0 6 -5 1 -1 -5 -5 -3 -3 -3 0 -6 -5 -2 -7 33K -2 1 -1 -2 -6 0 0 -3 -2 -5 -5 7 -3 -6 -2 -1 -2 -5 -3 -4 -2 0 -2 -7 34M -2 -3 -4 -6 -3 -1 -4 -5 -3 0 1 -3 9 -1 -5 -3 -2 -3 -3 0 -5 -2 -1 -7 35F -4 -5 -5 -6 -5 -6 -6 -5 -3 -1 -1 -6 -1 8 -6 -4 -4 0 1 -3 -6 -6 -3 -7 36P -2 -4 -4 -3 -5 -3 -3 -4 -4 -5 -5 -2 -5 -6 9 -2 -3 -6 -5 -4 -4 -3 -4 -7 37S 0 -2 0 -1 -2 -1 -1 -2 -2 -5 -5 -1 -3 -4 -2 7 0 -5 -3 -4 -1 -1 -2 -7 38T -1 -3 -1 -3 -2 -2 -2 -4 -4 -2 -3 -2 -2 -4 -3 0 7 -4 -3 -1 -2 -2 -2 -7 39W -5 -5 -7 -7 -4 -3 -5 -4 -5 -5 -3 -5 -3 0 -6 -5 -4 12 0 -6 -7 -4 -4 -7 40Y -3 -3 -4 -6 -4 -3 -4 -5 0 -3 -3 -3 -3 1 -5 -3 -3 0 9 -3 -5 -3 -2 -7 41V -1 -5 -5 -6 -2 -4 -4 -6 -6 2 0 -4 0 -3 -4 -4 -1 -6 -3 6 -6 -4 -2 -7 42B -4 -3 6 6 -5 -1 0 -2 -1 -5 -6 -2 -5 -6 -4 -1 -2 -7 -5 -6 7 -1 -3 -7 43Z -2 -1 -2 0 -6 6 6 -4 -1 -6 -5 0 -2 -6 -3 -1 -2 -4 -3 -4 -1 7 -2 -7 44X -2 -2 -2 -3 -4 -2 -2 -4 -3 -2 -2 -2 -1 -3 -4 -2 -2 -4 -2 -2 -3 -2 -2 -7 45* -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 1 46*/ 47 48scoreMatrix = 49 { 50 { 6, -3, -4, -4, -2, -2, -2, -1, -3, -3, -3, -2, -2, -4, -2, 0, -1, -5, -3, -1, -4, -2, -2, -7} 51 { -3, 8, -2, -4, -6, 0, -2, -5, -2, -6, -4, 1, -3, -5, -4, -2, -3, -5, -3, -5, -3, -1, -2, -7} 52 { -4, -2, 8, 0, -5, -1, -2, -2, 0, -6, -6, -1, -4, -5, -4, 0, -1, -7, -4, -5, 6, -2, -2, -7} 53 { -4, -4, 0, 8, -6, -2, 0, -3, -3, -5, -6, -2, -6, -6, -3, -1, -3, -7, -6, -6, 6, 0, -3, -7} 54 { -2, -6, -5, -6, 10, -5, -7, -5, -5, -3, -3, -6, -3, -5, -5, -2, -2, -4, -4, -2, -5, -6, -4, -7} 55 { -2, 0, -1, -2, -5, 8, 1, -4, 0, -6, -4, 0, -1, -6, -3, -1, -2, -3, -3, -4, -1, 6, -2, -7} 56 { -2, -2, -2, 0, -7, 1, 7, -4, -1, -6, -5, 0, -4, -6, -3, -1, -2, -5, -4, -4, 0, 6, -2, -7} 57 { -1, -5, -2, -3, -5, -4, -4, 7, -4, -7, -6, -3, -5, -5, -4, -2, -4, -4, -5, -6, -2, -4, -4, -7} 58 { -3, -2, 0, -3, -5, 0, -1, -4, 10, -6, -5, -2, -3, -3, -4, -2, -4, -5, 0, -6, -1, -1, -3, -7} 59 { -3, -6, -6, -5, -3, -6, -6, -7, -6, 6, 0, -5, 0, -1, -5, -5, -2, -5, -3, 2, -5, -6, -2, -7} 60 { -3, -4, -6, -6, -3, -4, -5, -6, -5, 0, 6, -5, 1, -1, -5, -5, -3, -3, -3, 0, -6, -5, -2, -7} 61 { -2, 1, -1, -2, -6, 0, 0, -3, -2, -5, -5, 7, -3, -6, -2, -1, -2, -5, -3, -4, -2, 0, -2, -7} 62 { -2, -3, -4, -6, -3, -1, -4, -5, -3, 0, 1, -3, 9, -1, -5, -3, -2, -3, -3, 0, -5, -2, -1, -7} 63 { -4, -5, -5, -6, -5, -6, -6, -5, -3, -1, -1, -6, -1, 8, -6, -4, -4, 0, 1, -3, -6, -6, -3, -7} 64 { -2, -4, -4, -3, -5, -3, -3, -4, -4, -5, -5, -2, -5, -6, 9, -2, -3, -6, -5, -4, -4, -3, -4, -7} 65 { 0, -2, 0, -1, -2, -1, -1, -2, -2, -5, -5, -1, -3, -4, -2, 7, 0, -5, -3, -4, -1, -1, -2, -7} 66 { -1, -3, -1, -3, -2, -2, -2, -4, -4, -2, -3, -2, -2, -4, -3, 0, 7, -4, -3, -1, -2, -2, -2, -7} 67 { -5, -5, -7, -7, -4, -3, -5, -4, -5, -5, -3, -5, -3, 0, -6, -5, -4, 12, 0, -6, -7, -4, -4, -7} 68 { -3, -3, -4, -6, -4, -3, -4, -5, 0, -3, -3, -3, -3, 1, -5, -3, -3, 0, 9, -3, -5, -3, -2, -7} 69 { -1, -5, -5, -6, -2, -4, -4, -6, -6, 2, 0, -4, 0, -3, -4, -4, -1, -6, -3, 6, -6, -4, -2, -7} 70 { -4, -3, 6, 6, -5, -1, 0, -2, -1, -5, -6, -2, -5, -6, -4, -1, -2, -7, -5, -6, 7, -1, -3, -7} 71 { -2, -1, -2, 0, -6, 6, 6, -4, -1, -6, -5, 0, -2, -6, -3, -1, -2, -4, -3, -4, -1, 7, -2, -7} 72 { -2, -2, -2, -3, -4, -2, -2, -4, -3, -2, -2, -2, -1, -3, -4, -2, -2, -4, -2, -2, -3, -2, -2, -7} 73 { -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, 1} 74 }; 75} 76 77if (refSeq == 1) 78{ 79 80// HIV-1 between 5% 81/* 82 A R N D C Q E G H I L K M F P S T W Y V B Z X * 83A 8 -13 -16 -8 -15 -17 -8 -7 -16 -14 -14 -17 -16 -20 -7 -7 -2 -23 -23 -4 -10 -10 -8 -24 84R -13 8 -12 -20 -12 -6 -15 -6 -3 -11 -11 -1 -7 -21 -9 -6 -7 -10 -18 -16 -14 -8 -7 -24 85N -16 -12 9 -1 -15 -11 -14 -12 -4 -11 -20 -4 -19 -20 -17 -2 -4 -24 -8 -18 7 -12 -7 -24 86D -8 -20 -1 9 -22 -19 -3 -7 -8 -19 -22 -15 -22 -23 -19 -11 -13 -23 -11 -9 7 -4 -7 -24 87C -15 -12 -15 -22 11 -23 -23 -10 -13 -19 -15 -21 -22 -3 -20 -5 -10 -7 -4 -12 -17 -23 -9 -24 88Q -17 -6 -11 -19 -23 8 -7 -17 -4 -20 -8 -4 -13 -20 -5 -15 -13 -20 -15 -20 -12 6 -8 -24 89E -8 -15 -14 -3 -23 -7 8 -6 -15 -20 -23 -5 -14 -24 -20 -19 -13 -23 -17 -9 -5 6 -9 -24 90G -7 -6 -12 -7 -10 -17 -6 7 -19 -20 -22 -11 -21 -13 -20 -5 -12 -9 -22 -10 -8 -7 -9 -24 91H -16 -3 -4 -8 -13 -4 -15 -19 11 -16 -8 -14 -19 -13 -7 -12 -10 -17 -1 -22 -5 -6 -7 -24 92I -14 -11 -11 -19 -19 -20 -20 -20 -16 7 -4 -13 -3 -6 -17 -9 -3 -22 -15 -1 -12 -20 -7 -24 93L -14 -11 -20 -22 -15 -8 -23 -22 -8 -4 7 -16 -5 -3 -7 -10 -16 -10 -15 -8 -21 -11 -9 -24 94K -17 -1 -4 -15 -21 -4 -5 -11 -14 -13 -16 8 -9 -19 -17 -11 -5 -20 -22 -13 -6 -5 -6 -24 95M -16 -7 -19 -22 -22 -13 -14 -21 -19 -3 -5 -9 11 -13 -20 -18 -5 -16 -23 -4 -20 -14 -7 -24 96F -20 -21 -20 -23 -3 -20 -24 -13 -13 -6 -3 -19 -13 10 -19 -10 -18 -10 -2 -10 -21 -22 -8 -24 97P -7 -9 -17 -19 -20 -5 -20 -20 -7 -17 -7 -17 -20 -19 9 -5 -8 -18 -18 -20 -18 -8 -9 -24 98S -7 -6 -2 -11 -5 -15 -19 -5 -12 -9 -10 -11 -18 -10 -5 8 -3 -19 -11 -16 -4 -17 -7 -24 99T -2 -7 -4 -13 -10 -13 -13 -12 -10 -3 -16 -5 -5 -18 -8 -3 8 -23 -16 -10 -6 -13 -6 -24 100W -23 -10 -24 -23 -7 -20 -23 -9 -17 -22 -10 -20 -16 -10 -18 -19 -23 10 -9 -23 -23 -21 -12 -24 101Y -23 -18 -8 -11 -4 -15 -17 -22 -1 -15 -15 -22 -23 -2 -18 -11 -16 -9 10 -18 -9 -16 -9 -24 102V -4 -16 -18 -9 -12 -20 -9 -10 -22 -1 -8 -13 -4 -10 -20 -16 -10 -23 -18 8 -11 -11 -7 -24 103B -10 -14 7 7 -17 -12 -5 -8 -5 -12 -21 -6 -20 -21 -18 -4 -6 -23 -9 -11 8 -6 -8 -24 104Z -10 -8 -12 -4 -23 6 6 -7 -6 -20 -11 -5 -14 -22 -8 -17 -13 -21 -16 -11 -6 7 -9 -24 105X -8 -7 -7 -7 -9 -8 -9 -9 -7 -7 -9 -6 -7 -8 -9 -7 -6 -12 -9 -7 -8 -9 -8 -24 106* -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 1 107*/ 108 scoreMatrix = 109 { 110 { 8, -13, -16, -8, -15, -17, -8, -7, -16, -14, -14, -17, -16, -20, -7, -7, -2, -23, -23, -4, -10, -10, -8, -24} 111 { -13, 8, -12, -20, -12, -6, -15, -6, -3, -11, -11, -1, -7, -21, -9, -6, -7, -10, -18, -16, -14, -8, -7, -24} 112 { -16, -12, 9, -1, -15, -11, -14, -12, -4, -11, -20, -4, -19, -20, -17, -2, -4, -24, -8, -18, 7, -12, -7, -24} 113 { -8, -20, -1, 9, -22, -19, -3, -7, -8, -19, -22, -15, -22, -23, -19, -11, -13, -23, -11, -9, 7, -4, -7, -24} 114 { -15, -12, -15, -22, 11, -23, -23, -10, -13, -19, -15, -21, -22, -3, -20, -5, -10, -7, -4, -12, -17, -23, -9, -24} 115 { -17, -6, -11, -19, -23, 8, -7, -17, -4, -20, -8, -4, -13, -20, -5, -15, -13, -20, -15, -20, -12, 6, -8, -24} 116 { -8, -15, -14, -3, -23, -7, 8, -6, -15, -20, -23, -5, -14, -24, -20, -19, -13, -23, -17, -9, -5, 6, -9, -24} 117 { -7, -6, -12, -7, -10, -17, -6, 7, -19, -20, -22, -11, -21, -13, -20, -5, -12, -9, -22, -10, -8, -7, -9, -24} 118 { -16, -3, -4, -8, -13, -4, -15, -19, 11, -16, -8, -14, -19, -13, -7, -12, -10, -17, -1, -22, -5, -6, -7, -24} 119 { -14, -11, -11, -19, -19, -20, -20, -20, -16, 7, -4, -13, -3, -6, -17, -9, -3, -22, -15, -1, -12, -20, -7, -24} 120 { -14, -11, -20, -22, -15, -8, -23, -22, -8, -4, 7, -16, -5, -3, -7, -10, -16, -10, -15, -8, -21, -11, -9, -24} 121 { -17, -1, -4, -15, -21, -4, -5, -11, -14, -13, -16, 8, -9, -19, -17, -11, -5, -20, -22, -13, -6, -5, -6, -24} 122 { -16, -7, -19, -22, -22, -13, -14, -21, -19, -3, -5, -9, 11, -13, -20, -18, -5, -16, -23, -4, -20, -14, -7, -24} 123 { -20, -21, -20, -23, -3, -20, -24, -13, -13, -6, -3, -19, -13, 10, -19, -10, -18, -10, -2, -10, -21, -22, -8, -24} 124 { -7, -9, -17, -19, -20, -5, -20, -20, -7, -17, -7, -17, -20, -19, 9, -5, -8, -18, -18, -20, -18, -8, -9, -24} 125 { -7, -6, -2, -11, -5, -15, -19, -5, -12, -9, -10, -11, -18, -10, -5, 8, -3, -19, -11, -16, -4, -17, -7, -24} 126 { -2, -7, -4, -13, -10, -13, -13, -12, -10, -3, -16, -5, -5, -18, -8, -3, 8, -23, -16, -10, -6, -13, -6, -24} 127 { -23, -10, -24, -23, -7, -20, -23, -9, -17, -22, -10, -20, -16, -10, -18, -19, -23, 10, -9, -23, -23, -21, -12, -24} 128 { -23, -18, -8, -11, -4, -15, -17, -22, -1, -15, -15, -22, -23, -2, -18, -11, -16, -9, 10, -18, -9, -16, -9, -24} 129 { -4, -16, -18, -9, -12, -20, -9, -10, -22, -1, -8, -13, -4, -10, -20, -16, -10, -23, -18, 8, -11, -11, -7, -24} 130 { -10, -14, 7, 7, -17, -12, -5, -8, -5, -12, -21, -6, -20, -21, -18, -4, -6, -23, -9, -11, 8, -6, -8, -24} 131 { -10, -8, -12, -4, -23, 6, 6, -7, -6, -20, -11, -5, -14, -22, -8, -17, -13, -21, -16, -11, -6, 7, -9, -24} 132 { -8, -7, -7, -7, -9, -8, -9, -9, -7, -7, -9, -6, -7, -8, -9, -7, -6, -12, -9, -7, -8, -9, -8, -24} 133 { -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, 1} 134 }; 135 136} 137 138if (refSeq == 2) 139{ 140 141// HIV-1 between 25% 142/* 143# Divergence level: 0.25 144# Matrix from: TeraMonkey:Users:sergei:hyphy:trunk:TemplateBatchFiles:TemplateModels:EmpiricalAA:HIVBetween 145# Expected score: -1.40621 Entropy: 5.90658 bits 146 A R N D C Q E G H I L K M F P S T W Y V B Z X * 147A 7 -7 -7 -4 -10 -11 -4 -3 -10 -6 -9 -9 -7 -13 -3 -2 1 -16 -15 0 -5 -5 -3 -17 148R -7 7 -5 -11 -8 -2 -7 -2 0 -6 -6 2 -3 -12 -4 -2 -2 -5 -9 -10 -7 -3 -3 -17 149N -7 -5 8 2 -9 -6 -6 -7 0 -6 -12 0 -10 -12 -9 1 0 -17 -3 -10 6 -6 -3 -17 150D -4 -11 2 8 -14 -10 0 -2 -3 -11 -15 -7 -13 -15 -13 -5 -6 -16 -6 -5 7 0 -3 -17 151C -10 -8 -9 -14 11 -16 -15 -5 -7 -11 -9 -13 -14 0 -12 -1 -6 -2 0 -8 -10 -16 -5 -17 152Q -11 -2 -6 -10 -16 8 -2 -10 0 -12 -4 0 -8 -12 -1 -9 -8 -14 -9 -13 -7 6 -4 -17 153E -4 -7 -6 0 -15 -2 7 -1 -9 -12 -15 -1 -10 -17 -13 -11 -8 -15 -12 -5 0 6 -4 -17 154G -3 -2 -7 -2 -5 -10 -1 7 -10 -11 -14 -6 -12 -9 -11 -1 -7 -5 -14 -5 -4 -3 -4 -17 155H -10 0 0 -3 -7 0 -9 -10 10 -10 -4 -5 -10 -6 -3 -6 -6 -11 2 -14 -1 -2 -3 -17 156I -6 -6 -6 -11 -11 -12 -12 -11 -10 7 0 -7 0 -2 -10 -4 0 -14 -9 2 -7 -12 -2 -17 157L -9 -6 -12 -15 -9 -4 -15 -14 -4 0 6 -10 0 0 -3 -5 -8 -6 -8 -4 -13 -6 -4 -17 158K -9 2 0 -7 -13 0 -1 -6 -5 -7 -10 7 -4 -14 -9 -5 -1 -12 -13 -9 -1 -1 -2 -17 159M -7 -3 -10 -13 -14 -8 -10 -12 -10 0 0 -4 10 -7 -11 -9 -1 -11 -15 0 -11 -9 -3 -17 160F -13 -12 -12 -15 0 -12 -17 -9 -6 -2 0 -14 -7 10 -11 -5 -10 -5 1 -5 -13 -14 -3 -17 161P -3 -4 -9 -13 -12 -1 -13 -11 -3 -10 -3 -9 -11 -11 8 -1 -3 -13 -11 -12 -10 -3 -5 -17 162S -2 -2 1 -5 -1 -9 -11 -1 -6 -4 -5 -5 -9 -5 -1 8 0 -12 -6 -9 0 -10 -3 -17 163T 1 -2 0 -6 -6 -8 -8 -7 -6 0 -8 -1 -1 -10 -3 0 7 -16 -10 -4 -2 -8 -2 -17 164W -16 -5 -17 -16 -2 -14 -15 -5 -11 -14 -6 -12 -11 -5 -13 -12 -16 10 -4 -16 -16 -14 -8 -17 165Y -15 -9 -3 -6 0 -9 -12 -14 2 -9 -8 -13 -15 1 -11 -6 -10 -4 10 -12 -4 -10 -4 -17 166V 0 -10 -10 -5 -8 -13 -5 -5 -14 2 -4 -9 0 -5 -12 -9 -4 -16 -12 7 -7 -7 -3 -17 167B -5 -7 6 7 -10 -7 0 -4 -1 -7 -13 -1 -11 -13 -10 0 -2 -16 -4 -7 7 -2 -4 -17 168Z -5 -3 -6 0 -16 6 6 -3 -2 -12 -6 -1 -9 -14 -3 -10 -8 -14 -10 -7 -2 6 -4 -17 169X -3 -3 -3 -3 -5 -4 -4 -4 -3 -2 -4 -2 -3 -3 -5 -3 -2 -8 -4 -3 -4 -4 -3 -17 170* -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 -17 1 171 172*/ 173 scoreMatrix = 174 { 175 { 7, -7, -7, -4, -10, -11, -4, -3, -10, -6, -9, -9, -7, -13, -3, -2, 1, -16, -15, 0, -5, -5, -3, -17} 176 { -7, 7, -5, -11, -8, -2, -7, -2, 0, -6, -6, 2, -3, -12, -4, -2, -2, -5, -9, -10, -7, -3, -3, -17} 177 { -7, -5, 8, 2, -9, -6, -6, -7, 0, -6, -12, 0, -10, -12, -9, 1, 0, -17, -3, -10, 6, -6, -3, -17} 178 { -4, -11, 2, 8, -14, -10, 0, -2, -3, -11, -15, -7, -13, -15, -13, -5, -6, -16, -6, -5, 7, 0, -3, -17} 179 { -10, -8, -9, -14, 11, -16, -15, -5, -7, -11, -9, -13, -14, 0, -12, -1, -6, -2, 0, -8, -10, -16, -5, -17} 180 { -11, -2, -6, -10, -16, 8, -2, -10, 0, -12, -4, 0, -8, -12, -1, -9, -8, -14, -9, -13, -7, 6, -4, -17} 181 { -4, -7, -6, 0, -15, -2, 7, -1, -9, -12, -15, -1, -10, -17, -13, -11, -8, -15, -12, -5, 0, 6, -4, -17} 182 { -3, -2, -7, -2, -5, -10, -1, 7, -10, -11, -14, -6, -12, -9, -11, -1, -7, -5, -14, -5, -4, -3, -4, -17} 183 { -10, 0, 0, -3, -7, 0, -9, -10, 10, -10, -4, -5, -10, -6, -3, -6, -6, -11, 2, -14, -1, -2, -3, -17} 184 { -6, -6, -6, -11, -11, -12, -12, -11, -10, 7, 0, -7, 0, -2, -10, -4, 0, -14, -9, 2, -7, -12, -2, -17} 185 { -9, -6, -12, -15, -9, -4, -15, -14, -4, 0, 6, -10, 0, 0, -3, -5, -8, -6, -8, -4, -13, -6, -4, -17} 186 { -9, 2, 0, -7, -13, 0, -1, -6, -5, -7, -10, 7, -4, -14, -9, -5, -1, -12, -13, -9, -1, -1, -2, -17} 187 { -7, -3, -10, -13, -14, -8, -10, -12, -10, 0, 0, -4, 10, -7, -11, -9, -1, -11, -15, 0, -11, -9, -3, -17} 188 { -13, -12, -12, -15, 0, -12, -17, -9, -6, -2, 0, -14, -7, 10, -11, -5, -10, -5, 1, -5, -13, -14, -3, -17} 189 { -3, -4, -9, -13, -12, -1, -13, -11, -3, -10, -3, -9, -11, -11, 8, -1, -3, -13, -11, -12, -10, -3, -5, -17} 190 { -2, -2, 1, -5, -1, -9, -11, -1, -6, -4, -5, -5, -9, -5, -1, 8, 0, -12, -6, -9, 0, -10, -3, -17} 191 { 1, -2, 0, -6, -6, -8, -8, -7, -6, 0, -8, -1, -1, -10, -3, 0, 7, -16, -10, -4, -2, -8, -2, -17} 192 { -16, -5, -17, -16, -2, -14, -15, -5, -11, -14, -6, -12, -11, -5, -13, -12, -16, 10, -4, -16, -16, -14, -8, -17} 193 { -15, -9, -3, -6, 0, -9, -12, -14, 2, -9, -8, -13, -15, 1, -11, -6, -10, -4, 10, -12, -4, -10, -4, -17} 194 { 0, -10, -10, -5, -8, -13, -5, -5, -14, 2, -4, -9, 0, -5, -12, -9, -4, -16, -12, 7, -7, -7, -3, -17} 195 { -5, -7, 6, 7, -10, -7, 0, -4, -1, -7, -13, -1, -11, -13, -10, 0, -2, -16, -4, -7, 7, -2, -4, -17} 196 { -5, -3, -6, 0, -16, 6, 6, -3, -2, -12, -6, -1, -9, -14, -3, -10, -8, -14, -10, -7, -2, 6, -4, -17} 197 { -3, -3, -3, -3, -5, -4, -4, -4, -3, -2, -4, -2, -3, -3, -5, -3, -2, -8, -4, -3, -4, -4, -3, -17} 198 { -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, 1} 199 }; 200 201 202} 203 204if (refSeq == 3) 205{ 206/* 207# Divergence level: 0.5 208# Matrix from: TeraMonkey:Users:sergei:hyphy:trunk:TemplateBatchFiles:TemplateModels:EmpiricalAA:HIVBetween 209# Expected score: -0.849814 Entropy: 4.53171 bits 210 A R N D C Q E G H I L K M F P S T W Y V B Z X * 211A 7 -5 -4 -2 -7 -8 -2 -1 -8 -2 -6 -5 -4 -9 -1 -1 2 -13 -12 1 -3 -4 -2 -13 212R -5 6 -2 -7 -6 0 -4 0 1 -4 -4 3 -1 -9 -3 0 -1 -4 -6 -7 -4 -1 -1 -13 213N -4 -2 7 3 -6 -3 -3 -4 0 -4 -8 0 -7 -8 -5 2 0 -13 -2 -7 6 -3 -1 -13 214D -2 -7 3 8 -10 -6 2 -1 -1 -8 -12 -4 -10 -11 -9 -2 -4 -12 -4 -4 6 0 -2 -13 215C -7 -6 -6 -10 11 -12 -12 -3 -4 -7 -6 -9 -10 2 -8 0 -4 0 1 -6 -7 -12 -3 -13 216Q -8 0 -3 -6 -12 7 -1 -7 1 -8 -2 0 -6 -8 0 -6 -5 -11 -6 -10 -5 5 -2 -13 217E -2 -4 -3 2 -12 -1 7 0 -6 -8 -11 0 -7 -13 -9 -7 -5 -12 -9 -3 0 5 -2 -13 218G -1 0 -4 -1 -3 -7 0 7 -7 -8 -10 -3 -9 -7 -8 0 -4 -3 -10 -4 -2 -1 -3 -13 219H -8 1 0 -1 -4 1 -6 -7 9 -7 -2 -2 -7 -3 -1 -3 -4 -8 3 -10 0 -1 -1 -13 220I -2 -4 -4 -8 -7 -8 -8 -8 -7 6 0 -5 2 0 -7 -3 0 -11 -7 3 -5 -8 -1 -13 221L -6 -4 -8 -12 -6 -2 -11 -10 -2 0 6 -7 0 1 -1 -4 -5 -4 -5 -1 -10 -5 -3 -13 222K -5 3 0 -4 -9 0 0 -3 -2 -5 -7 6 -3 -11 -6 -2 0 -9 -9 -6 0 0 -1 -13 223M -4 -1 -7 -10 -10 -6 -7 -9 -7 2 0 -3 10 -4 -7 -6 0 -9 -10 1 -8 -6 -1 -13 224F -9 -9 -8 -11 2 -8 -13 -7 -3 0 1 -11 -4 9 -7 -4 -7 -3 3 -3 -9 -10 -2 -13 225P -1 -3 -5 -9 -8 0 -9 -8 -1 -7 -1 -6 -7 -7 8 0 -1 -11 -8 -8 -7 -2 -3 -13 226S -1 0 2 -2 0 -6 -7 0 -3 -3 -4 -2 -6 -4 0 7 1 -9 -4 -6 0 -6 -1 -13 227T 2 -1 0 -4 -4 -5 -5 -4 -4 0 -5 0 0 -7 -1 1 6 -12 -7 -1 0 -5 -1 -13 228W -13 -4 -13 -12 0 -11 -12 -3 -8 -11 -4 -9 -9 -3 -11 -9 -12 10 -2 -12 -12 -11 -6 -13 229Y -12 -6 -2 -4 1 -6 -9 -10 3 -7 -5 -9 -10 3 -8 -4 -7 -2 9 -9 -3 -7 -3 -13 230V 1 -7 -7 -4 -6 -10 -3 -4 -10 3 -1 -6 1 -3 -8 -6 -1 -12 -9 6 -5 -5 -1 -13 231B -3 -4 6 6 -7 -5 0 -2 0 -5 -10 0 -8 -9 -7 0 0 -12 -3 -5 7 0 -2 -13 232Z -4 -1 -3 0 -12 5 5 -1 -1 -8 -5 0 -6 -10 -2 -6 -5 -11 -7 -5 0 6 -3 -13 233X -2 -1 -1 -2 -3 -2 -2 -3 -1 -1 -3 -1 -1 -2 -3 -1 -1 -6 -3 -1 -2 -3 -2 -13 234* -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 1 235 236*/ 237 scoreMatrix = 238 { 239 { 7, -5, -4, -2, -7, -8, -2, -1, -8, -2, -6, -5, -4, -9, -1, -1, 2, -13, -12, 1, -3, -4, -2, -13} 240 { -5, 6, -2, -7, -6, 0, -4, 0, 1, -4, -4, 3, -1, -9, -3, 0, -1, -4, -6, -7, -4, -1, -1, -13} 241 { -4, -2, 7, 3, -6, -3, -3, -4, 0, -4, -8, 0, -7, -8, -5, 2, 0, -13, -2, -7, 6, -3, -1, -13} 242 { -2, -7, 3, 8, -10, -6, 2, -1, -1, -8, -12, -4, -10, -11, -9, -2, -4, -12, -4, -4, 6, 0, -2, -13} 243 { -7, -6, -6, -10, 11, -12, -12, -3, -4, -7, -6, -9, -10, 2, -8, 0, -4, 0, 1, -6, -7, -12, -3, -13} 244 { -8, 0, -3, -6, -12, 7, -1, -7, 1, -8, -2, 0, -6, -8, 0, -6, -5, -11, -6, -10, -5, 5, -2, -13} 245 { -2, -4, -3, 2, -12, -1, 7, 0, -6, -8, -11, 0, -7, -13, -9, -7, -5, -12, -9, -3, 0, 5, -2, -13} 246 { -1, 0, -4, -1, -3, -7, 0, 7, -7, -8, -10, -3, -9, -7, -8, 0, -4, -3, -10, -4, -2, -1, -3, -13} 247 { -8, 1, 0, -1, -4, 1, -6, -7, 9, -7, -2, -2, -7, -3, -1, -3, -4, -8, 3, -10, 0, -1, -1, -13} 248 { -2, -4, -4, -8, -7, -8, -8, -8, -7, 6, 0, -5, 2, 0, -7, -3, 0, -11, -7, 3, -5, -8, -1, -13} 249 { -6, -4, -8, -12, -6, -2, -11, -10, -2, 0, 6, -7, 0, 1, -1, -4, -5, -4, -5, -1, -10, -5, -3, -13} 250 { -5, 3, 0, -4, -9, 0, 0, -3, -2, -5, -7, 6, -3, -11, -6, -2, 0, -9, -9, -6, 0, 0, -1, -13} 251 { -4, -1, -7, -10, -10, -6, -7, -9, -7, 2, 0, -3, 10, -4, -7, -6, 0, -9, -10, 1, -8, -6, -1, -13} 252 { -9, -9, -8, -11, 2, -8, -13, -7, -3, 0, 1, -11, -4, 9, -7, -4, -7, -3, 3, -3, -9, -10, -2, -13} 253 { -1, -3, -5, -9, -8, 0, -9, -8, -1, -7, -1, -6, -7, -7, 8, 0, -1, -11, -8, -8, -7, -2, -3, -13} 254 { -1, 0, 2, -2, 0, -6, -7, 0, -3, -3, -4, -2, -6, -4, 0, 7, 1, -9, -4, -6, 0, -6, -1, -13} 255 { 2, -1, 0, -4, -4, -5, -5, -4, -4, 0, -5, 0, 0, -7, -1, 1, 6, -12, -7, -1, 0, -5, -1, -13} 256 { -13, -4, -13, -12, 0, -11, -12, -3, -8, -11, -4, -9, -9, -3, -11, -9, -12, 10, -2, -12, -12, -11, -6, -13} 257 { -12, -6, -2, -4, 1, -6, -9, -10, 3, -7, -5, -9, -10, 3, -8, -4, -7, -2, 9, -9, -3, -7, -3, -13} 258 { 1, -7, -7, -4, -6, -10, -3, -4, -10, 3, -1, -6, 1, -3, -8, -6, -1, -12, -9, 6, -5, -5, -1, -13} 259 { -3, -4, 6, 6, -7, -5, 0, -2, 0, -5, -10, 0, -8, -9, -7, 0, 0, -12, -3, -5, 7, 0, -2, -13} 260 { -4, -1, -3, 0, -12, 5, 5, -1, -1, -8, -5, 0, -6, -10, -2, -6, -5, -11, -7, -5, 0, 6, -3, -13} 261 { -2, -1, -1, -2, -3, -2, -2, -3, -1, -1, -3, -1, -1, -2, -3, -1, -1, -6, -3, -1, -2, -3, -2, -13} 262 { -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, -13, 1} 263 }; 264 265} 266 267 268alignOptions ["SEQ_ALIGN_SCORE_MATRIX"] = scoreMatrix[{{0,0}}][{{19,19}}]; 269alignOptions ["SEQ_ALIGN_GAP_OPEN"] = 40; 270alignOptions ["SEQ_ALIGN_GAP_OPEN2"] = 20; 271alignOptions ["SEQ_ALIGN_GAP_EXTEND"] = 10; 272alignOptions ["SEQ_ALIGN_GAP_EXTEND2"] = 5; 273alignOptions ["SEQ_ALIGN_AFFINE"] = 1; 274 275ChoiceList (refSeq,"Prefix/Suffix Indels",1,SKIP_NONE,"No penalty","Do not penalize prefix and suffix Indels","Normal penalty","Treat prefix and suffix indels as any other indels"); 276if (refSeq < 0) 277{ 278 return 0; 279} 280 281alignOptions ["SEQ_ALIGN_NO_TP"] = 1-refSeq; 282 283if (_skipPredefsSeqAlignShared == 0) 284{ 285 predefSeqNames = {{"First in file", "Use the first sequence in the data file as a reference"} 286 {"Longest in file", "Use the longest sequence in the data file as the reference"} 287 /*0*/ {"HXB2_env", "Use HIV-1 HXB2 reference strain envelope sequence (K03455)"} 288 /*1*/ {"HXB2_nef", "Use HIV-1 HXB2 reference strain NEF sequence (K03455)"} 289 /*2*/ {"HXB2_gag", "Use HIV-1 HXB2 reference strain gag sequence (K03455)"} 290 /*3*/ {"HXB2_vpr", "Use HIV-1 HXB2 reference strain vpr sequence (K03455)"} 291 /*4*/ {"HXB2_vif", "Use HIV-1 HXB2 reference strain vif sequence (K03455)"} 292 /*5*/ {"HXB2_vpu", "Use HIV-1 HXB2 reference strain vpu sequence (K03455)"} 293 /*6*/ {"HXB2_pr", "Use HIV-1 HXB2 reference strain protease sequence (K03455)"} 294 /*7*/ {"HXB2_rt", "Use HIV-1 HXB2 reference strain reverse transcriptase sequence (K03455)"} 295 /*8*/ {"HXB2_int", "Use HIV-1 HXB2 reference strain integrase sequence (K03455)"} 296 /*9*/ {"HXB2_rev", "Use HIV-1 HXB2 reference strain rev (exons 1 and 2)sequence (K03455)"} 297 /*10*/ {"HXB2_tat", "Use HIV-1 HXB2 reference strain tat (exons 1 and 2) sequence (K03455)"} 298 /*11*/ {"HXB2_prrt", "Use HIV-1 HXB2 reference strain protease+rt sequence (K03455)"} 299 /*12*/ {"NL4_3prrt", "Use HIV-1 NL4-3 reference strain pr+rt sequence"} 300 /*13*/ {"HXB2_pol", "Use HIV-1 HXB2 reference strain pol (starting at pr) sequence (K03455)"} 301 }; 302 303 304 predefSeqNames2 = {{"No", "No reference coordinate sequences"} 305 /*0*/ {"HXB2_env", "Use HIV-1 HXB2 reference strain envelope sequence (K03455)"} 306 /*1*/ {"HXB2_nef", "Use HIV-1 HXB2 reference strain NEF sequence (K03455)"} 307 /*2*/ {"HXB2_gag", "Use HIV-1 HXB2 reference strain gag sequence (K03455)"} 308 /*3*/ {"HXB2_vpr", "Use HIV-1 HXB2 reference strain vpr sequence (K03455)"} 309 /*4*/ {"HXB2_vif", "Use HIV-1 HXB2 reference strain vif sequence (K03455)"} 310 /*5*/ {"HXB2_vpu", "Use HIV-1 HXB2 reference strain vpu sequence (K03455)"} 311 /*6*/ {"HXB2_pr", "Use HIV-1 HXB2 reference strain protease sequence (K03455)"} 312 /*7*/ {"HXB2_rt", "Use HIV-1 HXB2 reference strain reverse transcriptase sequence (K03455)"} 313 /*8*/ {"HXB2_int", "Use HIV-1 HXB2 reference strain integrase sequence (K03455)"} 314 /*9*/ {"HXB2_rev", "Use HIV-1 HXB2 reference strain rev (exons 1 and 2)sequence (K03455)"} 315 /*10*/ {"HXB2_tat", "Use HIV-1 HXB2 reference strain tat (exons 1 and 2) sequence (K03455)"} 316 /*11*/ {"HXB2_prrt", "Use HIV-1 HXB2 reference strain protease+rt sequence (K03455)"} 317 /*12*/ {"NL4_3prrt", "Use HIV-1 NL4-3 reference strain pr+rt sequence"} 318 /*13*/ {"HXB2_pol", "Use HIV-1 HXB2 reference strain pol (starting at pr) sequence (K03455)"} 319 }; 320 321 RefSeqs = {}; 322 RefSeqs [0] = "ATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATCTGTAGTGCTACAGAAAAATTGTGGGTCACAGTCTATTATGGGGTACCTGTGTGGAAGGAAGCAACCACCACTCTATTTTGTGCATCAGATGCTAAAGCATATGATACAGAGGTACATAATGTTTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAGTAGTATTGGTAAATGTGACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGAACAGATGCATGAGGATATAATCAGTTTATGGGATCAAAGCCTAAAGCCATGTGTAAAATTAACCCCACTCTGTGTTAGTTTAAAGTGCACTGATTTGAAGAATGATACTAATACCAATAGTAGTAGCGGGAGAATGATAATGGAGAAAGGAGAGATAAAAAACTGCTCTTTCAATATCAGCACAAGCATAAGAGGTAAGGTGCAGAAAGAATATGCATTTTTTTATAAACTTGATATAATACCAATAGATAATGATACTACCAGCTATAAGTTGACAAGTTGTAACACCTCAGTCATTACACAGGCCTGTCCAAAGGTATCCTTTGAGCCAATTCCCATACATTATTGTGCCCCGGCTGGTTTTGCGATTCTAAAATGTAATAATAAGACGTTCAATGGAACAGGACCATGTACAAATGTCAGCACAGTACAATGTACACATGGAATTAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGATCTGTCAATTTCACGGACAATGCTAAAACCATAATAGTACAGCTGAACACATCTGTAGAAATTAATTGTACAAGACCCAACAACAATACAAGAAAAAGAATCCGTATCCAGAGAGGACCAGGGAGAGCATTTGTTACAATAGGAAAAATAGGAAATATGAGACAAGCACATTGTAACATTAGTAGAGCAAAATGGAATAACACTTTAAAACAGATAGCTAGCAAATTAAGAGAACAATTTGGAAATAATAAAACAATAATCTTTAAGCAATCCTCAGGAGGGGACCCAGAAATTGTAACGCACAGTTTTAATTGTGGAGGGGAATTTTTCTACTGTAATTCAACACAACTGTTTAATAGTACTTGGTTTAATAGTACTTGGAGTACTGAAGGGTCAAATAACACTGAAGGAAGTGACACAATCACCCTCCCATGCAGAATAAAACAAATTATAAACATGTGGCAGAAAGTAGGAAAAGCAATGTATGCCCCTCCCATCAGTGGACAAATTAGATGTTCATCAAATATTACAGGGCTGCTATTAACAAGAGATGGTGGTAATAGCAACAATGAGTCCGAGATCTTCAGACCTGGAGGAGGAGATATGAGGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTAAAAATTGAACCATTAGGAGTAGCACCCACCAAGGCAAAGAGAAGAGTGGTGCAGAGAGAAAAAAGAGCAGTGGGAATAGGAGCTTTGTTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCAGCCTCAATGACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAGCAGCAGAACAATTTGCTGAGGGCTATTGAGGCGCAACAGCATCTGTTGCAACTCACAGTCTGGGGCATCAAGCAGCTCCAGGCAAGAATCCTGGCTGTGGAAAGATACCTAAAGGATCAACAGCTCCTGGGGATTTGGGGTTGCTCTGGAAAACTCATTTGCACCACTGCTGTGCCTTGGAATGCTAGTTGGAGTAATAAATCTCTGGAACAGATTTGGAATCACACGACCTGGATGGAGTGGGACAGAGAAATTAACAATTACACAAGCTTAATACACTCCTTAATTGAAGAATCGCAAAACCAGCAAGAAAAGAATGAACAAGAATTATTGGAATTAGATAAATGGGCAAGTTTGTGGAATTGGTTTAACATAACAAATTGGCTGTGGTATATAAAATTATTCATAATGATAGTAGGAGGCTTGGTAGGTTTAAGAATAGTTTTTGCTGTACTTTCTATAGTGAATAGAGTTAGGCAGGGATATTCACCATTATCGTTTCAGACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTGGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGAATAGTGCTGTTAGCTTGCTCAATGCCACAGCCATAGCAGTAGCTGAGGGGACAGATAGGGTTATAGAAGTAGTACAAGGAGCTTGTAGAGCTATTCGCCACATACCTAGAAGAATAAGACAGGGCTTGGAAAGGATTTTGCTA"; 323 RefSeqs [1] = "ATGGGTGGCAAGTGGTCAAAAAGTAGTGTGATTGGATGGCCTACTGTAAGGGAAAGAATGAGACGAGCTGAGCCAGCAGCAGATAGGGTGGGAGCAGCATCTCGAGACCTGGAAAAACATGGAGCAATCACAAGTAGCAATACAGCAGCTACCAATGCTGCTTGTGCCTGGCTAGAAGCACAAGAGGAGGAGGAGGTGGGTTTTCCAGTCACACCTCAGGTACCTTTAAGACCAATGACTTACAAGGCAGCTGTAGATCTTAGCCACTTTTTAAAAGAAAAGGGGGGACTGGAAGGGCTAATTCACTCCCAAAGAAGACAAGATATCCTTGATCTGTGGATCTACCACACACAAGGCTACTTCCCTGAT---CAGAACTACACACCAGGGCCAGGGGTCAGATATCCACTGACCTTTGGATGGTGCTACAAGCTAGTACCAGTTGAGCCAGATAAGATAGAAGAGGCCAATAAAGGAGAGAACACCAGCTTGTTACACCCTGTGAGCCTGCATGGGATGGATGACCCGGAGAGAGAAGTGTTAGAGTGGAGGTTTGACAGCCGCCTAGCATTTCATCACGTGGCCCGAGAGCTGCATCCGGAGTACTTCAAGAACTGC"; 324 RefSeqs [2] = "ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAGCAAAACAAAAGTAAGAAAAAAGCACAGCAAGCAGCAGCTGACACAGGACACAGCAATCAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCAAATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAGTAGTAGAAGAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACACAGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATGAGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATTGCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAGGAACAAATAGGATGGATGACAAATAATCCACCTATCCCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAGACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAACTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAAACCTTGTTGGTCCAAAATGCGAACCCAGATTGTAAGACTATTTTAAAAGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGGGAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATGATGCAGAGAGGCAATTTTAGGAACCAAAGAAAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGGACACCAAATGAAAGATTGTACTGAGAGACAGGCTAATTTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTTCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAA"; 325 RefSeqs [3] = "ATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACACTAGAGCTTTTAGAGGAGCTTAAGAATGAAGCTGTTAGACATTTTCCTAGGATTTGGCTCCATGGCTTAGGGCAACATATCTATGAAACTTATGGGGATACTTGGGCAGGAGTGGAAGCCATAATAAGAATTCTGCAACAACTGCTGTTTATCCATTTTCAGAATTGGGTGTCGACA---CAGAATAGGCGTTACTCGACAGAGGAGAGCAAGAAATGGAGCCAG---ATC"; 326 RefSeqs [4] = "ATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAGAACATGGAAAAGTTTAGTAAAACACCATATGTATGTTTCAGGGAAAGCTAGGGGATGGTTTTATAGACATCACTATGAAAGCCCTCATCCAAGAATAAGTTCAGAAGTACACATCCCACTAGGGGATGCTAGATTGGTAATAACAACATATTGGGGTCTGCATACAGGAGAAAGAGACTGGCATTTGGGTCAGGGAGTCTCCATAGAATGGAGGAAAAAGAGATATAGCACACAAGTAGACCCTGAACTAGCAGACCAACTAATTCATCTGTATTACTTTGACTGTTTTTCAGACTCTGCTATAAGAAAGGCCTTATTAGGACACATAGTTAGCCCTAGGTGTGAATATCAAGCAGGACATAACAAGGTAGGATCTCTACAATACTTGGCACTAGCAGCATTAATAACACCAAAAAAGATAAAGCCACCTTTGCCTAGTGTTACGAAACTGACAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGGGCCACAGAGGGAGCCACACAATGAATGGACAC"; 327 RefSeqs [5] = "ACGCAACCTATACCAATAGTAGCAATAGTAGCATTAGTAGTAGCAATAATAATAGCAATAGTTGTGTGGTCCATAGTAATCATAGAATATAGGAAAATATTAAGACAAAGAAAAATAGACAGGTTAATTGATAGACTAATAGAAAGAGCAGAAGACAGTGGCAATGAGAGTGAAGGAGAAATATCAGCACTTGTGGAGATGGGGGTGGAGATGGGGCACCATGCTCCTTGGGATGTTGATGATC"; 328 RefSeqs [6] = "CCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTT"; 329 RefSeqs [7] = "CCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTC"; 330 RefSeqs [8] = "TTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGAT"; 331 RefSeqs [9] = "ATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTAACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGATTAGTGAACGGATCCTTGGCACTTATCTGGGACGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTACTCTTGATTGTAACGAGGATTGTGGAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATTGGTGGAATCTCCTACAGTATTGGAGTCAGGAACTAAAGA"; 332 RefSeqs[10] = "ATGGAGCCAGTAGATCCTAGACTAGAGCCCTGGAAGCATCCAGGAAGTCAGCCTAAAACTGCTTGTACCAATTGCTATTGTAAAAAGTGTTGCTTTCATTGCCAAGTTTGTTTCATAACAAAAGCCTTAGGCATCTCCTATGGCAGGAAGAAGCGGAGACAGCGACGAAGAGCTCATCAGAACAGTCAGACTCATCAAGCTTCTCTATCAAAGCAACCCACCTCCCAACCCCGAGGGGACCCGACAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGAGAGACAGAGACAGATCCATTCGAT"; 333 RefSeqs[11] = "CCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTC"; 334 RefSeqs[12] = "CCTCAGATCACTCTTTGGCAGCGACCCCTCGTCACAATAAAGATAGGGGGGCAATTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAGGACAGTATGATCAGATACTCATAGAAATCTGCGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGCTGCACTTTAAATTTTCCCATTAGTCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGATTTCTGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAACAGAAAAAATCAGTAACAGTACTGGATGTGGGCGATGCATATTTTTCAGTTCCCTTAGATAAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAGTGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTCATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAACTGAGACAACATCTGTTGAGGTGGGGATTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAGGACAGCTGGACTGTCAATGACATACAGAAATTAGTGGGAAAATTGAATTGGGCAAGTCAGATTTATGCAGGGATTAAAGTAAGGCAATTATGTAAACTTCTTAGGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAACCGGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAAGGGTGCCCACACTAATGATGTGAAACAATTAACAGAGGCAGTACAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAATTACCCATACAAAAGGAAACATGGGAAGCATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTCAATACCCCTCCCTTAGTGAAGTTATGGTACCAGTTAGAGAAAGAACCCATAATAGGAGCAGAAACTTTC"; 335 RefSeqs[13] = "CCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAAAGATAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGAGTTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAGTATGATCAGATACTCATAGAAATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCTACACCTGTCAACATAATTGGAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTTCCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCATTAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATTGGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAGAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGTGTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGCAAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAAACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACAATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGGGAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACATGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGTTAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCATAGTAGGAGCAGAAACCTTCTATGTAGATGGGGCAGCTAACAGGGAGACTAAATTAGGAAAAGCAGGATATGTTACTAATAGAGGAAGACAAAAAGTTGTCACCCTAACTGACACAACAAATCAGAAGACTGAGTTACAAGCAATTTATCTAGCTTTGCAGGATTCGGGATTAGAAGTAAACATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACAACCAGATCAAAGTGAATCAGAGTTAGTCAATCAAATAATAGAGCAGTTAATAAAAAAGGAAAAGGTCTATCTGGCATGGGTACCAGCACACAAAGGAATTGGAGGAAATGAACAAGTAGATAAATTAGTCAGTGCTGGAATCAGGAAAGTACTATTTTTAGATGGAATAGATAAGGCCCAAGATGAACATGAGAAATATCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAACCTGCCACCTGTAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAATGTCAGCTAAAAGGAGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAACTAGATTGTACACATTTAGAAGGAAAAGTTATCCTGGTAGCAGTTCATGTAGCCAGTGGATATATAGAAGCAGAAGTTATTCCAGCAGAAACAGGGCAGGAAACAGCATATTTTCTTTTAAAATTAGCAGGAAGATGGCCAGTAAAAACAATACATACTGACAATGGCAGCAATTTCACCGGTGCTACGGTTAGGGCCGCCTGTTGGTGGGCGGGAATCAAGCAGGAATTTGGAATTCCCTACAATCCCCAAAGTCAAGGAGTAGTAGAATCTATGAATAAAGAATTAAAGAAAATTATAGGACAGGTAAGAGATCAGGCTGAACATCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAGTAGACATAATAGCAACAGACATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGACAGCAGAAATCCACTTTGGAAAGGACCAGCAAAGCTCCTCTGGAAAGGTGAAGGGGCAGTAGTAATACAAGATAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAAGTAGACAGGATGAGGATTAG"; 336 337 ChoiceList (refSeq,"Choose a reference sequence",1,SKIP_NONE,predefSeqNames); 338 if (refSeq < 0) 339 { 340 return 0; 341 } 342} 343 344//--------------------------------------------------------------------------------------------------------------------- 345 346function _handleAlignment (dataSetName, writeToFiles) 347{ 348 result = {}; 349 350 ExecuteCommands ("DataSetFilter _alignmentHandlerFilter = CreateFilter ("+dataSetName+",1);"); 351 GetInformation (UnalignedSeqs,_alignmentHandlerFilter ); 352 353 //preprocess sequences 354 355 unalSequenceCount = Rows(UnalignedSeqs)*Columns(UnalignedSeqs); 356 GetString (sequenceNames, unal, -1); 357 358 longestSequence = 0; 359 longestSequenceIDX = 0; 360 361 for (seqCounter = 0; seqCounter < unalSequenceCount; seqCounter = seqCounter+1) 362 { 363 UnalignedSeqs[seqCounter] = ((UnalignedSeqs[seqCounter]^{{"[^a-zA-Z]",""}})^{{"^N+",""}})^{{"N+$",""}}; 364 365 if (doLongestSequence) 366 { 367 if (doLongestSequence == 1 || seqCounter != unalSequenceCount-1) 368 { 369 if (Abs (UnalignedSeqs[seqCounter]) > longestSequence) 370 { 371 longestSequence = Abs (UnalignedSeqs[seqCounter]); 372 longestSequenceIDX = seqCounter; 373 } 374 } 375 } 376 } 377 378 if (doLongestSequence) 379 { 380 fprintf (stdout, "\nSelected sequence ", sequenceNames[longestSequenceIDX], " as reference."); 381 if (longestSequenceIDX > 0) 382 { 383 p1 = UnalignedSeqs[0]; 384 UnalignedSeqs[0] = UnalignedSeqs[longestSequenceIDX]; 385 UnalignedSeqs[longestSequenceIDX] = p1; 386 p1 = sequenceNames[0]; 387 sequenceNames[0] = sequenceNames[longestSequenceIDX]; 388 sequenceNames[longestSequenceIDX] = p1; 389 } 390 } 391 392 393 394 /* determine reading frames */ 395 ProteinSequences = {}; 396 AllTranslations = {}; 397 ReadingFrames = {}; 398 StopCodons = {}; 399 StopPositions = {}; 400 didRC = {}; 401 402 fprintf (stdout, "\nDetecting reading frames for each sequence...\n"); 403 frameCounter = {3,1}; 404 stillHasStops = {}; 405 406 for (seqCounter = 0; seqCounter < unalSequenceCount; seqCounter += 1) 407 { 408 aSeq = UnalignedSeqs[seqCounter]; 409 seqLen = Abs(aSeq)-2; 410 411 minStops = 1e20; 412 tString = ""; 413 rFrame = 0; 414 415 for (rc = 0; rc <= doRC; rc += 1) 416 { 417 stopPosn = {3,2}; 418 allTran = {3,1}; 419 if (rc) 420 { 421 aSeq = nucleotideReverseComplement (aSeq) 422 } 423 for (offset = 0; offset < 3; offset += 1) 424 { 425 translString = translateCodonToAA (aSeq, codonToAAMap, offset); 426 stopPos = translString||"X"; 427 if (stopPos[0]>=0) 428 { 429 stopCount = Rows(stopPos)$2; 430 stopPosn[offset][0] = stopPos[0]; 431 stopPosn[offset][1] = stopPos[stopCount*2-1]; 432 } 433 else 434 { 435 stopCount = 0; 436 } 437 if (stopCount<minStops) 438 { 439 if (rc) 440 { 441 UnalignedSeqs[seqCounter] = aSeq; 442 didRC[seqCounter] = 1; 443 } 444 minStops = stopCount; 445 rFrame = offset; 446 tString = translString; 447 } 448 allTran[offset] = translString; 449 } 450 } 451 452 ReadingFrames[seqCounter] = rFrame; 453 ProteinSequences[seqCounter] = tString; 454 frameCounter[rFrame] = frameCounter[rFrame]+1; 455 StopPositions[seqCounter] = stopPosn; 456 AllTranslations [seqCounter] = allTran; 457 458 if (minStops>0) 459 { 460 stillHasStops[Abs(stillHasStops)] = seqCounter; 461 assert (seqCounter > 0, "Reference sequence must not contain frameshifts\n"); 462 } 463 } 464 465 s1 = ProteinSequences[0]; 466 467 fprintf (stdout, "\nFound:\n\t", frameCounter[0], 468 " sequences in reading frame 1\n\t",frameCounter[1], 469 " sequences in reading frame 2\n\t",frameCounter[2], 470 " sequences in reading frame 3\n\n", 471 "There were ", Abs(stillHasStops), " sequences with apparent frameshift/sequencing errors\n"); 472 473 skipSeqs = {}; 474 475 for (k=0; k<Abs(stillHasStops); k += 1) 476 { 477 seqCounter = stillHasStops[k]; 478 seqName = sequenceNames[seqCounter]; 479 fprintf (stdout,"Sequence ", seqCounter+1, " (", seqName, ") seems to have"); 480 stopPosn = StopPositions[seqCounter]; 481 482 fStart = -1; 483 fEnd = -1; 484 fMin = 1e10; 485 frame1 = 0; 486 frame2 = 0; 487 488 checkFramePosition (stopPosn[0][1],stopPosn[1][0],0,1); 489 checkFramePosition (stopPosn[1][1],stopPosn[0][0],1,0); 490 checkFramePosition (stopPosn[0][1],stopPosn[2][0],0,2); 491 checkFramePosition (stopPosn[2][1],stopPosn[0][0],2,0); 492 checkFramePosition (stopPosn[2][1],stopPosn[1][0],2,1); 493 checkFramePosition (stopPosn[1][1],stopPosn[2][0],1,2); 494 495 if (fStart>=0) 496 { 497 allTran = AllTranslations[seqCounter]; 498 useq = UnalignedSeqs[seqCounter]; 499 fprintf (stdout, " a shift from frame ", frame2+1, " to frame ", frame1+1, " between a.a. positions ", fStart, " and ", fEnd, "."); 500 fStart2 = Max(fStart-1,0); 501 fEnd2 = Min(fEnd+1,Min(Abs(allTran[frame1]),Abs(allTran[frame2]))-1); 502 tempString = allTran[frame2]; 503 fprintf (stdout, "\n\tRegion ", fStart2, "-", fEnd2, " in frame ", frame2+1, ":\n\t", tempString[fStart2][fEnd2]); 504 fprintf (stdout, "\n\t", useq[3*fStart2+frame2][3*fEnd2+frame2-1]); 505 tempString = allTran[frame1]; 506 fprintf (stdout, "\n\tRegion ", fStart2, "-", fEnd2, " in frame ", frame1+1, ":\n\t", tempString[fStart2][fEnd2]); 507 fprintf (stdout, "\n\t", useq[3*fStart2+frame1][3*fEnd2+frame1-1]); 508 fprintf (stdout, "\n\t\tAttempting to resolve by alignment to reference. "); 509 510 f1s = allTran[frame1]; 511 f2s = allTran[frame2]; 512 f1l = Abs(f1s); 513 514 bestScore = -1e10; 515 bestSplice = -1; 516 517 for (k2=fStart; k2<fEnd; k2=k2+1) 518 { 519 s2 = f2s[0][k2]+f1s[k2+1][Abs(f1s)]; 520 inStr = {{s1,s2}}; 521 AlignSequences(aligned, inStr, alignOptions); 522 aligned = aligned[0]; 523 aligned = aligned[0]; 524 if (aligned > bestScore) 525 { 526 bestScore = aligned; 527 bestSplice = k2; 528 bestString = s2; 529 } 530 } 531 fprintf (stdout, "Best splice site appears to be at a.a. position ", bestSplice, "\n"); 532 /* update best spliced string */ 533 534 ProteinSequences[seqCounter] = bestString; 535 ReadingFrames[seqCounter] = 0; 536 537 UnalignedSeqs[seqCounter] = useq[frame2][frame2+3*bestSplice+2] + useq[frame1+3*bestSplice+3][Abs(useq)-1] + "---"; 538 } 539 else 540 { 541 542 fprintf (stdout, " multiple frameshifts\n"); 543 skipSeqs[seqCounter] = 1; 544 } 545 } 546 547 SeqAlignments = {}; 548 startingPosition = {unalSequenceCount,2}; 549 refLength = Abs(ProteinSequences[0]); 550 refInsertions = {refLength+1,1}; 551 552 fprintf (stdout,"\nPerforming pairwise alignment with reference sequences\n"); 553 554 alignmentScores = {}; 555 556 for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter += 1) 557 { 558 if (skipSeqs[seqCounter] == 0) 559 { 560 s2 = ProteinSequences[seqCounter]; 561 inStr = {{s1,s2}}; 562 AlignSequences(aligned, inStr, alignOptions); 563 aligned = aligned[0]; 564 SeqAlignments[seqCounter] = aligned; 565 alignmentScores + aligned[0]/Abs(aligned[1]); 566 aligned = aligned[1]; 567 myStartingPosition = aligned$"[^-]"; 568 myEndingPosition = Abs (aligned)-1; 569 570 while (aligned[myEndingPosition]=="-") 571 { 572 myEndingPosition = myEndingPosition - 1; 573 } 574 575 myStartingPosition = myStartingPosition[0]; 576 startingPosition[seqCounter][0] = myStartingPosition; 577 startingPosition[seqCounter][1] = myEndingPosition; 578 aligned = aligned[myStartingPosition][myEndingPosition]; 579 580 refInsert = aligned||"-+"; 581 if (refInsert[0]>0) 582 { 583 insCount = Rows (refInsert)/2; 584 offset = 0; 585 for (insN = 0; insN < insCount; insN = insN+1) 586 { 587 insPos = refInsert[insN*2]; 588 insLength = refInsert[insN*2+1]-insPos+1; 589 insPos = insPos-offset; 590 if (refInsertions[insPos]<insLength) 591 { 592 refInsertions[insPos]=insLength; 593 } 594 offset = offset + insLength; 595 } 596 } 597 } 598 } 599 600 alignmentScoresM = avlToMatrix ("alignmentScores"); 601 LoadFunctionLibrary ("DescriptiveStatistics.bf"); 602 distInfo = GatherDescriptiveStats (alignmentScoresM); 603 distInfo["Mean"] - 2*distInfo["Std.Dev"]; 604 605 fprintf (stdout,"\nMerging pairwise alignments into a MSA\n"); 606 607 fullRefSeq = ""; 608 fullRefSeq * refLength; 609 fullRefSeq * (s1[0]); 610 611 612 s1N = UnalignedSeqs[0]; 613 614 fullRefSeqN = ""; 615 fullRefSeqN * (3*refLength); 616 fullRefSeqN * (s1N[0][2]); 617 618 frameShift = ReadingFrames[0]; 619 620 for (seqCounter=1;seqCounter<refLength;seqCounter=seqCounter+1) 621 { 622 gapCount = refInsertions[seqCounter]; 623 for (k=0; k<gapCount;k=k+1) 624 { 625 fullRefSeq*("-"); 626 fullRefSeqN*("---"); 627 } 628 fullRefSeq * (s1[seqCounter]); 629 fullRefSeqN * (s1N[frameShift+seqCounter*3][frameShift+seqCounter*3+2]); 630 } 631 632 fullRefSeq * 0; 633 fullRefSeqN * 0; 634 635 refLength = Abs(fullRefSeq); 636 637 seqName=sequenceNames[0]; 638 if (writeToFiles) 639 { 640 641 SetDialogPrompt ("Save alignment to:"); 642 fprintf (PROMPT_FOR_FILE,CLEAR_FILE,">",seqName,"\n",fullRefSeq); 643 fName = LAST_FILE_PATH; 644 fNameC = fName+".nuc"; 645 fprintf (fNameC,CLEAR_FILE,">",seqName,"\n",fullRefSeqN); 646 } 647 else 648 { 649 result [seqName] = fullRefSeqN; 650 } 651 652 653 alCounter = 0; 654 655 for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter += 1) 656 { 657 if (skipSeqs[seqCounter] == 0) 658 { 659 if (skipOutliers == 0 && alignmentScoresM[alCounter] < lowerCuttoff) 660 { 661 seqName=sequenceNames[seqCounter]; 662 fprintf (stdout, "Sequence ", seqName ," was skipped because of a poor alignment score.\n"); 663 skipSeqs[seqCounter] = 1; 664 alCounter = alCounter + 1; 665 continue; 666 } 667 alCounter = alCounter + 1; 668 seqName=sequenceNames[seqCounter]; 669 aligned = SeqAlignments[seqCounter]; 670 671 aligned1 = aligned[1]; 672 aligned2 = aligned[2]; 673 674 s2 = startingPosition[seqCounter][0]; 675 e2 = startingPosition[seqCounter][1]; 676 677 gappedSeq = ""; 678 gappedSeq * Abs(aligned2); 679 680 681 k=0; 682 683 while (k<refLength) 684 { 685 while (fullRefSeq[k]!=aligned1[s2]) 686 { 687 gappedSeq*("-"); 688 k=k+1; 689 } 690 gappedSeq*(aligned2[s2]); 691 s2=s2+1; 692 k=k+1; 693 } 694 695 gappedSeq * 0; 696 697 gappedSeqN = ""; 698 gappedSeqN * (3*Abs(aligned2)); 699 700 frameShift = ReadingFrames[seqCounter]; 701 702 s1N = UnalignedSeqs[seqCounter]; 703 s2N = ProteinSequences[seqCounter]; 704 s2 = startingPosition[seqCounter][0]; 705 k = 0; 706 e2 = Abs(gappedSeq); 707 k = 0; 708 while (k<e2) 709 { 710 while ((s2N[s2]!=gappedSeq[k])&&(k<e2)) 711 { 712 gappedSeqN * ("---"); 713 k=k+1; 714 } 715 if (k<e2) 716 { 717 gappedSeqN * s1N[frameShift+s2*3][frameShift+s2*3+2]; 718 s2 = s2+1; 719 k=k+1; 720 } 721 } 722 gappedSeqN * 0; 723 724 if (writeToFiles) 725 { 726 if (refSeq2 && seqCounter == unalSequenceCount-1) 727 { 728 fscanf (fName, "Raw", soFar); 729 fprintf (fName, CLEAR_FILE,">",seqName,"\n",gappedSeq,"\n",soFar); 730 fscanf (fNameC, "Raw", soFar); 731 fprintf (fNameC,CLEAR_FILE,">",seqName,"\n",gappedSeqN,"\n",soFar); 732 733 } 734 else 735 { 736 fprintf (fName,"\n>",seqName,"\n",gappedSeq); 737 fprintf (fNameC,"\n>",seqName,"\n",gappedSeqN); 738 } 739 } 740 else 741 { 742 result [seqName] = gappedSeqN; 743 } 744 } 745 } 746 747 if (Abs(skipSeqs) && writeToFiles) 748 { 749 fName = fName+".bad"; 750 for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter = seqCounter+1) 751 { 752 if (skipSeqs[seqCounter]) 753 { 754 seqName=sequenceNames[seqCounter]; 755 fprintf (fName,">",seqName,"\n",UnalignedSeqs[seqCounter],"\n"); 756 } 757 } 758 } 759 760 return result; 761} 762 763//--------------------------------------------------------------------------------------------------------------------- 764 765 766function checkFramePosition (pos1, pos2, fr1, fr2) 767{ 768 fSpan = pos2-pos1; 769 770 if (fSpan>1) // first followed by second 771 { 772 if (fSpan < fMin) 773 { 774 fMin = fSpan; 775 frame1 = fr1; 776 frame2 = fr2; 777 fStart = pos1+1; 778 fEnd = pos2; 779 } 780 } 781 return 0; 782} 783 784