1ChoiceList (testType,"Test for",1,SKIP_NONE,"Mean branch length","Compare mean branch lengths between two or more non-nested clades.", 2 "Mean pairwise divergence","Compare mean within-clade pairwise divergence between two or more non-nested clades."); 3 4if (testType < 0) 5{ 6 return 0; 7} 8 9if (testType) 10{ 11 echoString = "mean within-clade divergence"; 12} 13else 14{ 15 echoString = "mean within-clade branch length (or component branch length)"; 16} 17ChoiceList (dataType,"Data type",1,SKIP_NONE,"Nucleotide/Protein","Nucleotide or amino-acid (protein).", 18 "Codon","Codon (several available genetic codes)."); 19 20if (dataType<0) 21{ 22 return; 23} 24 25if (dataType) 26{ 27 NICETY_LEVEL = 3; 28 SetDialogPrompt ("Please choose a codon data file:"); 29 incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def"; 30 ExecuteCommands ("#include \""+incFileName+"\";"); 31} 32else 33{ 34 SetDialogPrompt ("Please choose a nucleotide or amino-acid data file:"); 35} 36 37DataSet ds = ReadDataFile (PROMPT_FOR_FILE); 38 39if (dataType) 40{ 41 DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions); 42} 43else 44{ 45 DataSetFilter filteredData = CreateFilter (ds,1); 46} 47 48fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds); 49 50 51_DO_TREE_REBALANCE_ = 0; 52 53SelectTemplateModel(filteredData); 54 55if (Rows("LAST_MODEL_PARAMETER_LIST")>1) 56{ 57 ChoiceList (parameter2Constrain, "Parameter(s) to constrain:",1,SKIP_NONE,LAST_MODEL_PARAMETER_LIST); 58 59 if (parameter2Constrain<0) 60 { 61 return 0; 62 } 63 if (parameter2Constrain==0) 64 { 65 fprintf (stdout, "ERROR: Multiple parameter constraints are not supported by this analysis; sorry!\n"); 66 return 0; 67 } 68} 69else 70{ 71 parameter2Constrain = 1; 72} 73 74GetString (funnyString,LAST_MODEL_PARAMETER_LIST,parameter2Constrain-1); 75 76_DO_TREE_REBALANCE_ = 0; 77incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"queryTree.bf"; 78ExecuteCommands ("#include \""+incFileName+"\";"); 79 80treeAVL = givenTree ^ 1; /* pre-order traversal */ 81 82if (testType) 83{ 84 treeAVL2 = givenTree ^ 0; /* post-order traversal */ 85 86 multFactors = {}; 87 for (k=1; k<Abs(treeAVL2); k=k+1) 88 { 89 aNode = treeAVL2[k]; 90 aNodeName = aNode["Name"]; 91 parentIndex = aNode["Parent"]; 92 k2 = Abs(aNode["Children"]); 93 if (k2) 94 { 95 currentDepth = aNode["Below"]; 96 multFactors[aNodeName] = currentDepth; 97 if (parentIndex > 0) 98 { 99 pInfo = treeAVL2[parentIndex]; 100 pInfo ["Below"] = pInfo ["Below"] + currentDepth; 101 treeAVL2[parentIndex] = pInfo; 102 } 103 } 104 else 105 { 106 multFactors[aNodeName] = 1; 107 pInfo = treeAVL2[parentIndex]; 108 pInfo ["Below"] = pInfo ["Below"] + 1; 109 treeAVL2[parentIndex] = pInfo; 110 } 111 112 } 113 treeAVL2 = 0; 114} 115 116branchNameList = BranchName (givenTree,-1); 117mapToBL = {}; 118 119for (k=0; k<Columns(branchNameList); k=k+1) 120{ 121 aNode = branchNameList[k]; 122 mapToBL [aNode] = k; 123} 124 125cladeNodeLists = {}; 126cladeIDLists = {}; 127cladeSumVar = {}; 128nameToAVLIdx = {}; 129cladeLeafCount = {}; 130 131for (k=1; k<Abs(treeAVL); k=k+1) 132{ 133 aNode = treeAVL[k]; 134 aNodeName = aNode["Name"]; /*upcase*/ 135 nameToAVLIdx [aNodeName] = k; 136 match = (aNodeName&&1)$"^CLADE[0-9]+$"; 137 if (match[0]>=0 && Abs (aNode["Children"])>0) 138 /* valid clade */ 139 { 140 currentDepth = aNode["Depth"]; 141 nodeList = {}; 142 143 leafCount = 0; 144 145 for (k2=k+1; k2<Abs(treeAVL); k2=k2+1) 146 { 147 aNode2 = treeAVL[k2]; 148 if (aNode2["Depth"] <= currentDepth) 149 { 150 break; 151 } 152 if (Abs(aNode2["Children"]) == 0) 153 { 154 leafCount = leafCount + 1; 155 } 156 minCladeIndex = aNode2["Name"]; 157 nodeList[Abs(nodeList)] = minCladeIndex; 158 nameToAVLIdx [minCladeIndex] = k2; 159 } 160 161 k = k2-1; 162 cladesFound = Abs(nodeList); 163 cladeNodeLists[aNodeName] = nodeList; 164 indexMatrix = {cladesFound,1}; 165 for (k2 = 0; k2 < cladesFound; k2=k2+1) 166 { 167 aNode2 = nodeList[k2]; 168 indexMatrix [k2] = mapToBL[aNode2]; 169 } 170 171 cladeIDLists[aNodeName] = indexMatrix; 172 cladeLeafCount[aNodeName] = leafCount; 173 } 174} 175 176 177cladesFound = Abs (cladeNodeLists); 178if (cladesFound < 2) 179{ 180 fprintf (stdout, "ERROR: Couldn't find 2 or more (non-nested) clades with roots labeled by CladeN, where N is a number. Found ", cladesFound, " labeled clades\n"); 181 return 0; 182} 183 184cladeRoots = Rows (cladeNodeLists); 185 186minCladeSize = 1e100; 187minCladeIndex = 0; 188 189mFactors = {}; 190bmFactors = {}; 191 192fprintf (stdout, "\n\nFound ", cladesFound, " labeled clades\n"); 193for (k=0; k<cladesFound; k=k+1) 194{ 195 cladeName = cladeRoots[k]; 196 cladeSize = Abs(cladeNodeLists[cladeName]); 197 leafCount = Abs(cladeLeafCount[cladeName]); 198 199 fprintf (stdout, "Clade rooted at ", cladeName, " with ", cladeSize , " branches and ", leafCount, " leaves.\n"); 200 nodeList = cladeNodeLists[cladeName]; 201 202 if (testType) 203 { 204 if (minCladeSize > leafCount) 205 { 206 minCladeSize = leafCount; 207 minCladeIndex = k; 208 } 209 } 210 else 211 { 212 if (minCladeSize > cladeSize) 213 { 214 minCladeSize = cladeSize; 215 minCladeIndex = k; 216 } 217 } 218 219 bmFactor = {Columns(branchNameList),1}; 220 cString = ""; 221 cString * 256; 222 for (k2 = 0; k2<Abs(nodeList); k2=k2+1) 223 { 224 aNodeName = nodeList[k2]; 225 aNode = nameToAVLIdx[aNodeName]; 226 aNode = treeAVL[aNode]; 227 if (testType) 228 { 229 cCount = Abs(aNode["Children"]); 230 if (cCount) 231 { 232 cCount = multFactors[aNodeName]; 233 mFactors[aNodeName] = (leafCount-cCount)*cCount 234 } 235 else 236 { 237 mFactors[aNodeName] = leafCount-1; 238 } 239 } 240 else 241 { 242 mFactors[aNodeName] = 1; 243 } 244 k3 = mapToBL[aNodeName]; 245 bmFactor [k3] = mFactors[aNodeName]; 246 if (k2) 247 { 248 cString * "+"; 249 } 250 cString * ("TREE_PLACE_HOLDER."+aNodeName + "." + funnyString + "*" + mFactors[aNodeName]); 251 } 252 cString * 0; 253 cladeSumVar[cladeName] = cString; 254 bmFactors[cladeName] = bmFactor; 255 256} 257 258 259fprintf (stdout, "1). Running an unconstrained fit\n"); 260 261Tree aTree = treeString; 262 263USE_LAST_RESULTS = 0; 264 265LikelihoodFunction lf = (filteredData,aTree); 266 267Optimize (res_free,lf); 268fprintf (stdout, lf, "\n"); 269 270blVector = BranchLength (aTree,-1); 271ReportMeans (blVector); 272 273USE_LAST_RESULTS = 1; 274 275fprintf (stdout, "2). Running a completely constrained fit\n"); 276 277Tree mirrorTree = treeString; 278 279 280ReplicateConstraint ("this1.?.?:=this2.?.?",mirrorTree,aTree); 281 282ClearConstraints (mirrorTree); 283 284for (k=0; k<cladesFound; k=k+1) 285{ 286 if (k==minCladeIndex) 287 { 288 smallCladeName = cladeRoots[minCladeIndex]; 289 cladeConstraint = cladeSumVar[smallCladeName] ^ {{"TREE_PLACE_HOLDER","aTree"}}; 290 ExecuteCommands ("global SUM_"+smallCladeName+":="+cladeConstraint+";"); 291 } 292 else 293 { 294 cladeName = cladeRoots[k]; 295 cladeConstraint = cladeSumVar[cladeName] ^ {{"TREE_PLACE_HOLDER","mirrorTree"}}; 296 ExecuteCommands ("global SUM_"+cladeName+":="+cladeConstraint+";"); 297 } 298} 299 300 301for (k=0; k<cladesFound; k=k+1) 302{ 303 if (k!=minCladeIndex) 304 { 305 cladeName = cladeRoots[k]; 306 if (testType) 307 { 308 cladeSize = cladeLeafCount[cladeName]; 309 ExecuteCommands ("ReplicateConstraint (\"this1.?."+funnyString+":= SUM_"+smallCladeName+"*"+(cladeSize*(cladeSize-1)/minCladeSize/(minCladeSize-1))+"/SUM_"+cladeName+"*this2.?."+funnyString+"\",aTree."+cladeName+",mirrorTree."+cladeName+");"); 310 } 311 else 312 { 313 cladeSize = Abs(cladeNodeLists[cladeName]); 314 ExecuteCommands ("ReplicateConstraint (\"this1.?."+funnyString+":= SUM_"+smallCladeName+"*"+(cladeSize/minCladeSize)+"/SUM_"+cladeName+"*this2.?."+funnyString+"\",aTree."+cladeName+",mirrorTree."+cladeName+");"); 315 } 316 ExecuteCommands ("aTree."+cladeName+"."+funnyString+"=0.1;"); 317 } 318} 319 320LikelihoodFunction lf = (filteredData,aTree); 321Optimize (res_contsr,lf); 322 323fprintf (stdout, "\n\nLRT for difference in mean divergences\n", 324 "\nLR = ", 2*(res_free[1][0]-res_contsr[1][0]), 325 "\np-value = ", 1-CChi2(2*(res_free[1][0]-res_contsr[1][0]),cladesFound-1),"\n"); 326 327fprintf (stdout, lf, "\n"); 328 329blVector = BranchLength (aTree,-1); 330ReportMeans (blVector); 331 332comparisonIndex = 3; 333 334ClearConstraints (aTree); 335 336 337if (cladesFound > 2) 338{ 339 for (c1 = 0; c1<cladesFound-1; c1 = c1+1) 340 { 341 smallCladeName = cladeRoots[c1]; 342 if (testType) 343 { 344 minCladeSize = cladeLeafCount[smallCladeName]; 345 } 346 else 347 { 348 minCladeSize = Abs(cladeNodeLists[smallCladeName]); 349 } 350 351 cladeConstraint = cladeSumVar[smallCladeName] ^ {{"TREE_PLACE_HOLDER","aTree"}}; 352 ExecuteCommands ("global SUM_"+smallCladeName+":="+cladeConstraint+";"); 353 354 for (c2 = c1+1; c2 < cladesFound; c2=c2+1) 355 { 356 cladeName = cladeRoots[c2]; 357 358 cladeConstraint = cladeSumVar[cladeName] ^ {{"TREE_PLACE_HOLDER","mirrorTree"}}; 359 ExecuteCommands ("global SUM_"+cladeName+":="+cladeConstraint+";"); 360 361 if (testType) 362 { 363 cladeSize = cladeLeafCount[cladeName]; 364 ExecuteCommands ("ReplicateConstraint (\"this1.?."+funnyString+":= SUM_"+smallCladeName+"*"+(cladeSize*(cladeSize-1)/minCladeSize/(minCladeSize-1))+"/SUM_"+cladeName+"*this2.?."+funnyString+"\",aTree."+cladeName+",mirrorTree."+cladeName+");"); 365 } 366 else 367 { 368 cladeSize = Abs(cladeNodeLists[cladeName]); 369 ExecuteCommands ("ReplicateConstraint (\"this1.?."+funnyString+":= SUM_"+smallCladeName+"*"+(cladeSize/minCladeSize)+"/SUM_"+cladeName+"*this2.?."+funnyString+"\",aTree."+cladeName+",mirrorTree."+cladeName+");"); 370 } 371 372 ExecuteCommands ("aTree."+cladeName+".t=0.1;"); 373 fprintf (stdout, comparisonIndex, "). Running a constrained fit on clades ", smallCladeName, " and ", cladeName, "\n"); 374 375 LikelihoodFunction lf = (filteredData,aTree); 376 Optimize (res_2c,lf); 377 378fprintf (stdout, "\n\nLRT for difference in mean divergences\n", 379 "\nLR = ", 2*(res_free[1][0]-res_2c[1][0]), 380 "\np-value = ", 1-CChi2(2*(res_free[1][0]-res_2c[1][0]),1),"\n"); 381 382 fprintf (stdout, lf, "\n"); 383 384 blVector = BranchLength (aTree,-1); 385 ReportMeans (blVector); 386 comparisonIndex = comparisonIndex + 1; 387 388 ClearConstraints (aTree); 389 } 390 391 } 392} 393 394USE_LAST_RESULTS = 0; 395 396/*--------------------------------------------------------------------------------------------------------------*/ 397 398function ReportMeans (branches) 399{ 400 for (k=0; k<cladesFound; k=k+1) 401 { 402 cladeName = cladeRoots[k]; 403 sum = branches*bmFactors[cladeName]; 404 405 if (testType) 406 { 407 leafCount = cladeLeafCount[cladeName]; 408 sum = sum[0]*2/leafCount/(leafCount-1); 409 } 410 else 411 { 412 cladeSize = Abs(cladeNodeLists[cladeName]); 413 sum = sum[0]/cladeSize; 414 } 415 416 fprintf (stdout, "Clade rooted at ", cladeName, " has ", echoString, " of ", sum, "\n"); 417 418 } 419 return 0; 420} 421