1VERBOSITY_LEVEL = -1; 2incFileName = HYPHY_LIB_DIRECTORY + "TemplateBatchFiles" + DIRECTORY_SEPARATOR + "Utility" + DIRECTORY_SEPARATOR + "NJ.bf"; 3ExecuteCommands ("#include \""+incFileName + "\";"); 4 5/* ________________________________________________________________________________________________*/ 6 7 8function processAPartition (dataFilePath, partitionMatrix) 9{ 10 if (Abs (dataFilePath)) 11 { 12 DataSet ds = ReadDataFile (dataFilePath); 13 } 14 readPCount = Columns (partitionMatrix); 15 /*fprintf (stdout, "\nLoaded ", readPCount , " partitions\n");*/ 16 17 outAVL = {}; 18 19 partitionStrings = {}; 20 treeStrings = {}; 21 22 lastP = 0; 23 for (h=0; h<readPCount; h=h+1) 24 { 25 pString = ""+lastP+"-"+partitionMatrix[h]; 26 lastP = partitionMatrix[h]+1; 27 partitionStrings [h] = pString; 28 DataSetFilter filteredData = CreateFilter (ds,1,pString); 29 treeStrings[h] = InferTreeTopology(filteredData); 30 } 31 32 pString = ""+lastP+"-"+(ds.sites-1); 33 partitionStrings [h] = pString; 34 DataSetFilter filteredData = CreateFilter (ds,1,pString); 35 treeStrings[h] = InferTreeTopology(filteredData); 36 37 resp = 4; 38 39 global betaP = 1; 40 global betaQ = 1; 41 betaP:>0.05;betaP:<85; 42 betaQ:>0.05;betaQ:<85; 43 44 category pc = (resp-1, EQUAL, MEAN, 45 _x_^(betaP-1)*(1-_x_)^(betaQ-1)/Beta(betaP,betaQ), /* density */ 46 IBeta(_x_,betaP,betaQ), /*CDF*/ 47 0, /*left bound*/ 48 1, /*right bound*/ 49 IBeta(_x_,betaP+1,betaQ)*betaP/(betaP+betaQ) 50 ); 51 52 53 global alpha = .5; 54 alpha:>0.01;alpha:<100; 55 category c = (resp, pc, MEAN, 56 GammaDist(_x_,alpha,alpha), 57 CGammaDist(_x_,alpha,alpha), 58 0 , 59 1e25, 60 CGammaDist(_x_,alpha+1,alpha) 61 ); 62 63 global AC = 1; 64 global AT = 1; 65 global CG = 1; 66 global CT = 1; 67 global GT = 1; 68 69 GTR_Matrix = {{*,AC*t*c,t*c,AT*t*c} 70 {AC*t*c,*,CG*t*c,CT*t*c} 71 {t*c,CG*t*c,*,GT*t*c} 72 {AT*t*c,CT*t*c,GT*t*c,*}}; 73 74 DataSetFilter filteredData = CreateFilter (ds,1); 75 76 bppMap = {}; 77 for (h=0; h<filteredData.sites; h=h+1) 78 { 79 filterString = "" + h; 80 DataSetFilter siteFilter = CreateFilter (filteredData,1,filterString); 81 82 HarvestFrequencies (f1, siteFilter, 1, 1, 0); 83 m1 = 0; 84 for (mpiNode=0; (mpiNode < 4) && (m1<=1) ; mpiNode=mpiNode+1) 85 { 86 if (f1[mpiNode]>0) 87 { 88 m1=m1+1; 89 } 90 } 91 if (m1>1) 92 { 93 bppMap[Abs(bppMap)] = h; 94 } 95 } 96 97 /*fprintf (stdout, "\nSequences :", filteredData.species, 98 "\nSites :", filteredData.sites, 99 "\nVariable :", Abs(bppMap), "\n"); */ 100 101 outAVL ["VS"] = Abs(bppMap); 102 103 HarvestFrequencies (baseFreqs,ds,1,1,1); 104 105 /*fprintf (stdout, "\n\nf(A) = ", baseFreqs[0], 106 "\nf(C) = ", baseFreqs[1], 107 "\nf(G) = ", baseFreqs[2], 108 "\nf(T) = ", baseFreqs[3],"\n");*/ 109 110 Model GTR_Model = (GTR_Matrix, baseFreqs, 1); 111 112 treeString = InferTreeTopology (0); 113 Tree givenTree = treeString; 114 115 LikelihoodFunction singlePart = (filteredData, givenTree); 116 Optimize (res, singlePart); 117 118 baseParams = res[1][2]; 119 120 if (baseParams>0) 121 { 122 123 ConstraintString = ""; 124 ConstraintString*256; 125 for (h=0; h<baseParams; h=h+1) 126 { 127 GetString (v,singlePart,h); 128 ConstraintString * (v+":="+v+"__;\n"); 129 } 130 ConstraintString*0; 131 ExecuteCommands (ConstraintString); 132 } 133 134 nullCAIC = -2(res[1][0]-res[1][1]*filteredData.sites/(filteredData.sites-res[1][1]-1)); 135 136 /*fprintf (stdout, "\n\nc-AIC = ", nullCAIC, "\n", singlePart);*/ 137 138 outAVL["AC"] = AC; 139 outAVL["AT"] = AT; 140 outAVL["CG"] = CG; 141 outAVL["CT"] = CT; 142 outAVL["GT"] = GT; 143 outAVL["alpha"] = alpha; 144 145 m1 = computeMeanDivergence ("givenTree"); 146 147 /*fprintf (stdout, "\nMean divergence : ", m1*100, "%\n");*/ 148 outAVL ["Divergence"] = m1*100; 149 150 USE_DISTANCES = 0; 151 152 if (readPCount == 0) 153 { 154 return outAVL; 155 } 156 157 lfDef = ""; 158 lfDef * 128; 159 lfDef * "LikelihoodFunction multiPart = ("; 160 161 for (pccounter = 0; pccounter <= readPCount; pccounter = pccounter + 1) 162 { 163 ExecuteCommands ("DataSetFilter part_" + pccounter + " = CreateFilter (ds, 1, \"" + partitionStrings[pccounter] + "\");"); 164 ExecuteCommands ("Tree tree_" + pccounter + " = " + treeStrings[pccounter] + ";"); 165 if (pccounter) 166 { 167 lfDef * ","; 168 } 169 lfDef * ("part_"+pccounter+",tree_"+pccounter); 170 } 171 lfDef * ");"; 172 lfDef * 0; 173 ExecuteCommands (lfDef); 174 Optimize (res2, multiPart); 175 176 /*fprintf (stdout, multiPart,"\n",res,"\n", res2);*/ 177 178 myDF = baseParams + res2[1][1]; 179 180 fullCAIC = -2(res2[1][0]-myDF*filteredData.sites/(filteredData.sites-myDF-1)); 181 /*fprintf (stdout, "\n\nc-AIC = ", fullCAIC, "\nDelta AIC = ", nullCAIC-fullCAIC,"\n\n");*/ 182 outAVL ["DAIC"] = nullCAIC-fullCAIC; 183 outAVL ["DLogL"] = res2[1][0]-res[1][0]; 184 185 if (outAVL ["DAIC"] < 0) 186 { 187 return outAVL; 188 } 189 190 fragMatrix = {2,readPCount+1}; 191 192 for (pccounter = 0; pccounter <= readPCount; pccounter = pccounter + 1) 193 { 194 ExecuteCommands ("siteCount = part_" + pccounter + ".sites;"); 195 /*fprintf (stdout, "Partition ", pccounter+1, " : ", siteCount, " sites\n");*/ 196 fragMatrix [0][pccounter] = siteCount; 197 } 198 199 pairWiseSplitMatch = {readPCount+1,readPCount+1}; 200 201 for (pccounter = 0; pccounter <= readPCount; pccounter = pccounter + 1) 202 { 203 for (pc2 = pccounter+1; pc2 <= readPCount; pc2 = pc2 + 1) 204 { 205 ExecuteCommands ("siteCount = compareTreesForSplits(\"tree_" +pccounter + "\",\"tree_"+pc2+"\");"); 206 pairWiseSplitMatch[pccounter][pc2] = siteCount[2]/Min(siteCount[0],siteCount[1]); 207 } 208 } 209 210 /* now do SH testing */ 211 212 conditionals = {}; 213 likelihoodScores = {readPCount+1,readPCount+1}; 214 pairwiseP = {readPCount+1,readPCount+1}; 215 216 treeSplitMatches = 0; 217 khIterations = 1000; 218 219 for (pccounter = 0; pccounter <= readPCount; pccounter = pccounter + 1) 220 { 221 for (pc2 = 0; pc2 <= readPCount; pc2 = pc2 + 1) 222 { 223 if (Abs(pc2-pccounter) <= 1) 224 { 225 DataSetFilter aPart = CreateFilter (ds,1,partitionStrings[pccounter]); 226 Tree aTree = treeStrings[pc2]; 227 LikelihoodFunction aLF = (aPart,aTree); 228 229 /*fprintf (stdout, "\n\nFitting tree ", pc2+1, " to partition ", pccounter+1, "\n");*/ 230 Optimize (aRes, aLF); 231 /*fprintf (stdout, aLF);*/ 232 GetInformation (cI,c); 233 cI = cI[1][-1]; 234 ConstructCategoryMatrix (conds, aLF); 235 conds = Log(cI*conds); 236 conditionals [""+pccounter+","+pc2] = conds; 237 likelihoodScores[pccounter][pc2] = aRes[1][0]; 238 treeSplitMatches = treeSplitMatches + pairWiseSplitMatch[pccounter][pc2]; 239 } 240 } 241 242 /*fprintf (stdout, "\nKH Testing partition ", pccounter+1, "\n");*/ 243 partTreeConds = conditionals[""+pccounter+","+pccounter]; 244 245 for (pc2 = 0; pc2 <= readPCount; pc2 = pc2 + 1) 246 { 247 if (Abs(pc2-pccounter) == 1) 248 { 249 otherPartTree = conditionals[""+pccounter+","+pc2]; 250 baseLRT = 2*(likelihoodScores[pccounter][pccounter]-likelihoodScores[pccounter][pc2]); 251 /*fprintf (stdout, "Tree ", pc2+1, " base LRT = ", baseLRT, ". p-value = ");*/ 252 textMx = testLRT(partTreeConds,otherPartTree,khIterations) % 0; 253 for (kk=0; kk<khIterations; kk=kk+1) 254 { 255 if (textMx[kk]>=0) 256 { 257 break; 258 } 259 } 260 pval = Max(1,kk)/khIterations; 261 /* 262 fprintf (stdout, pval , "\n"); 263 */ 264 265 pairwiseP[pccounter][pc2] = pval; 266 } 267 } 268 } 269 270 treeSplitMatches = treeSplitMatches*2/readPCount/(readPCount+1); 271 totalComparisons = readPCount*2; 272 threshP = 0.01/totalComparisons; 273 274 significantCouplings = 0; 275 276 for (pccounter = 1; pccounter <= readPCount; pccounter = pccounter + 1) 277 { 278 if (pairwiseP[pccounter][pccounter-1] <= threshP && pairwiseP[pccounter-1][pccounter] <= threshP) 279 { 280 significantCouplings = significantCouplings + 1; 281 } 282 fragMatrix [1][pccounter] = totalComparisons*Max (pairwiseP[pccounter][pccounter-1],pairwiseP[pccounter-1][pccounter]); 283 } 284 285 /*fprintf (stdout, "A total of ", significantCouplings, "/", readPCount, " significant couplings\n"); 286 fprintf (stdout, "Mean splits identify: ",treeSplitMatches, "\n" );*/ 287 outAVL["SIG_CPL"] = significantCouplings; 288 outAVL["SPLITS"] = treeSplitMatches; 289 outAVL["Fragments"] = fragMatrix; 290 291 return outAVL; 292} 293 294/* ________________________________________________________________________________________________*/ 295 296function computeMeanDivergence (treeID) 297{ 298 ExecuteCommands ("treeAVL2="+treeID+"^0;leafCount=TipCount("+treeID+")"); 299 multFactors = {}; 300 301 for (k=1; k<Abs(treeAVL2); k=k+1) 302 { 303 aNode = treeAVL2[k]; 304 aNodeName = aNode["Name"]; 305 parentIndex = aNode["Parent"]; 306 k2 = Abs(aNode["Children"]); 307 if (k2) 308 { 309 currentDepth = aNode["Below"]; 310 multFactors[aNodeName] = currentDepth; 311 if (parentIndex > 0) 312 { 313 pInfo = treeAVL2[parentIndex]; 314 pInfo ["Below"] = pInfo ["Below"] + currentDepth; 315 treeAVL2[parentIndex] = pInfo; 316 } 317 } 318 else 319 { 320 multFactors[aNodeName] = 1; 321 pInfo = treeAVL2[parentIndex]; 322 pInfo ["Below"] = pInfo ["Below"] + 1; 323 treeAVL2[parentIndex] = pInfo; 324 } 325 } 326 327 pKeys = Rows(multFactors); 328 329 for (k=0; k<Columns(pKeys); k=k+1) 330 { 331 aNodeName = pKeys[k]; 332 multFactors[aNodeName] = multFactors[aNodeName] * (leafCount-multFactors[aNodeName]); 333 } 334 335 ExecuteCommands ("bNames = BranchName ("+treeID+",-1);"); 336 ExecuteCommands ("bLen = BranchLength ("+treeID+",-1);"); 337 338 sum = 0; 339 bc = Columns(bNames)-1; 340 341 for (k=0; k<bc; k=k+1) 342 { 343 aNodeName = bNames[k]; 344 sum = sum + bLen[k]*multFactors[aNodeName]; 345 } 346 return sum/(leafCount*(leafCount-1)*0.5); 347} 348 349/* ________________________________________________________________________________________________*/ 350 351function compareTreesForSplits (tName, tName2) 352{ 353 /* obtain an AVL data structure of the tree, post-order layout */ 354 355 ExecuteCommands ("_astavl_="+tName+"^1;"); 356 _tree_size_ = Abs (_astavl_); 357 358 359 nodeMapAVL = {}; 360 361 for (_a_node = 2; _a_node < _tree_size_; _a_node = _a_node + 1) 362 { 363 _node_info = _astavl_[_a_node]; 364 myDegree = Abs(_node_info["Children"]); 365 366 if (myDegree == 0) 367 { 368 nodeName = _node_info["Name"]; 369 nodeMapAVL [nodeName] = Abs(nodeMapAVL)+1; 370 } 371 } 372 373 split1 = getSplits(0); 374 ExecuteCommands ("_astavl_="+tName2+"^1;"); 375 _tree_size_ = Abs (_astavl_); 376 split2 = getSplits(0); 377 378 s1c = Abs(split1); 379 s2c = Abs(split2); 380 381 matches = {}; 382 match1 = {}; 383 match2 = {}; 384 for (r1 = 0; r1 < s1c; r1 = r1 + 1) 385 { 386 if (match1[r1] == 0) 387 { 388 for (r2 = 0; r2 < s2c; r2 = r2 + 1) 389 { 390 if (match2[r2] == 0) 391 { 392 splitCheck = compareSplits (split1[r1],split2[r2]); 393 if (splitCheck) 394 { 395 mr = {{r1,r2}}; 396 matches [Abs(matches)] = mr; 397 match1[r1] = 1; 398 match2[r2] = 1; 399 } 400 } 401 } 402 } 403 } 404 405 406 return {{s1c,s2c,Abs(matches)}}; 407} 408 409/* ________________________________________________________________________________________________*/ 410 411function getSplits (dummy) 412{ 413 treeSplits = {}; 414 for (_a_node = 2; _a_node < _tree_size_; _a_node = _a_node + 1) 415 { 416 _node_info = _astavl_[_a_node]; 417 myDegree = Abs(_node_info["Children"]); 418 myDepth = _node_info["Depth"]; 419 420 421 if (myDegree && _node_info["Length"]>0) 422 { 423 nodeName = _node_info["Name"]; 424 aSplit = {Abs(nodeMapAVL),1}; 425 for (_b_node = _a_node + 1; _b_node < _tree_size_; _b_node = _b_node + 1) 426 { 427 _bnode_info = _astavl_[_b_node]; 428 if (_bnode_info["Depth"] <= myDepth) 429 { 430 break; 431 } 432 if (Abs(_bnode_info["Children"])==0) 433 { 434 _bnode_info = _bnode_info["Name"]; 435 _bnode_info = nodeMapAVL[_bnode_info]; 436 if (_bnode_info == 0) 437 { 438 fprintf (stdout, "Error: extraneous node name\n"); 439 return 0; 440 } 441 aSplit [_bnode_info-1] = 1; 442 } 443 } 444 treeSplits [Abs(treeSplits)] = aSplit; 445 } 446 } 447 return treeSplits; 448} 449 450/* ________________________________________________________________________________________________*/ 451 452function compareSplits (s1, s2) 453{ 454 sl = Rows(s1); 455 positiveCheck = (s1[0] == s2[0]); 456 for (k=1; k<sl; k=k+1) 457 { 458 if (s1[k] != s2[k]) 459 { 460 if (positiveCheck) 461 { 462 return 0; 463 } 464 } 465 else 466 { 467 if (!positiveCheck) 468 { 469 return 0; 470 } 471 } 472 } 473 return -1+2*positiveCheck; 474} 475 476/*--------------------------------------------------------------------------------------*/ 477 478function testLRT (vec1, vec2, itCount) 479{ 480 size1 = Columns(vec1); 481 482 sumVec1 = {size1,1}; 483 jvec = {2,size1}; 484 resMx1 = {itCount,1}; 485 resMx2 = {itCount,1}; 486 487 for (k=0; k<size1; k=k+1) 488 { 489 sumVec1 [k] = 1; 490 jvec [0][k] = vec1[k]; 491 jvec [1][k] = vec2[k]; 492 } 493 494 495 for (k=0; k<itCount; k=k+1) 496 { 497 resampled = Random(jvec,1); 498 resampled = resampled*sumVec1; 499 resMx1[k] = resampled[0]; 500 resMx2[k] = resampled[1]; 501 } 502 503 return (resMx1-resMx2)*2; 504} 505