1 /* massXpert - the true massist's program. 2 -------------------------------------- 3 Copyright(C) 2006,2007 Filippo Rusconi 4 5 http://www.massxpert.org/massXpert 6 7 This file is part of the massXpert project. 8 9 The massxpert project is the successor to the "GNU polyxmass" 10 project that is an official GNU project package(see 11 www.gnu.org). The massXpert project is not endorsed by the GNU 12 project, although it is released ---in its entirety--- under the 13 GNU General Public License. A huge part of the code in massXpert 14 is actually a C++ rewrite of code in GNU polyxmass. As such 15 massXpert was started at the Centre National de la Recherche 16 Scientifique(FRANCE), that granted me the formal authorization to 17 publish it under this Free Software License. 18 19 This software is free software; you can redistribute it and/or 20 modify it under the terms of the GNU General Public 21 License version 3, as published by the Free Software Foundation. 22 23 24 This software is distributed in the hope that it will be useful, 25 but WITHOUT ANY WARRANTY; without even the implied warranty of 26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 27 General Public License for more details. 28 29 You should have received a copy of the GNU General Public License 30 along with this software; if not, write to the 31 32 Free Software Foundation, Inc., 33 34 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 35 */ 36 37 38 /////////////////////// Local includes 39 #include "cleaver.hpp" 40 41 42 namespace massXpert 43 { 44 Cleaver(Polymer * polymer,const PolChemDef * polChemDef,const CleaveOptions & cleaveOptions,const CalcOptions & calcOptions,const IonizeRule & ionizeRule)45 Cleaver::Cleaver(Polymer *polymer, 46 const PolChemDef *polChemDef, 47 const CleaveOptions &cleaveOptions, 48 const CalcOptions &calcOptions, 49 const IonizeRule &ionizeRule) 50 : mp_polymer(polymer), 51 mp_polChemDef(polChemDef), 52 m_cleaveOptions(cleaveOptions), 53 m_calcOptions(calcOptions), 54 m_ionizeRule(ionizeRule) 55 { 56 Q_ASSERT(mp_polymer && mp_polChemDef); 57 mp_oligomerList = 0; 58 } 59 60 Cleaver(const Cleaver & other)61 Cleaver::Cleaver(const Cleaver &other) 62 : mp_polymer(other.mp_polymer), 63 mp_polChemDef(other.mp_polChemDef), 64 m_cleaveOptions(other.m_cleaveOptions), 65 m_calcOptions(other.m_calcOptions), 66 m_ionizeRule(other.m_ionizeRule) 67 { 68 Q_ASSERT(mp_polymer && mp_polChemDef); 69 mp_oligomerList = 0; 70 } 71 72 ~Cleaver()73 Cleaver::~Cleaver() 74 { 75 // We are not owner of the oligomer list, do not free it! 76 } 77 78 79 void setOligomerList(OligomerList * oligomerList)80 Cleaver::setOligomerList(OligomerList *oligomerList) 81 { 82 Q_ASSERT(oligomerList); 83 84 mp_oligomerList = oligomerList; 85 } 86 87 88 OligomerList * oligomerList()89 Cleaver::oligomerList() 90 { 91 return mp_oligomerList; 92 } 93 94 95 bool cleave(bool reset)96 Cleaver::cleave(bool reset) 97 { 98 if (!mp_oligomerList) 99 qFatal("%s@%d -- The oligomer list is 0.", __FILE__, __LINE__); 100 101 // If the polymer sequence is empty, just return. 102 if (!mp_polymer->size()) 103 return true; 104 105 // Ensure that the cleavage pattern was already parsed. 106 107 if (!m_cleaveOptions.motifList()->size()) 108 { 109 if (!m_cleaveOptions.parse()) 110 { 111 qDebug() << __FILE__ << __LINE__ 112 << "Failed to parse the cleavage options"; 113 114 return false; 115 } 116 } 117 118 // qDebug() << __FILE__ << __LINE__ 119 // << "number of motifs:" 120 // << mp_cleaveOptions->motifList()->size(); 121 122 if (!fillIndexLists()) 123 { 124 qDebug() << __FILE__ << __LINE__ 125 << "Index lists(cleave/nocleave) are empty." 126 "No oligomer generated."; 127 128 // We can return true, as no error condition was found but not 129 // oligomers were generated. 130 131 return true; 132 } 133 134 if (resolveCleavageNoCleavage() == -1) 135 { 136 qDebug() << __FILE__ << __LINE__ 137 << "Failed to resolve cleavage/nocleavage"; 138 139 return false; 140 } 141 142 removeDuplicatesCleavage(); 143 144 qSort(m_cleaveIndexList.begin(), m_cleaveIndexList.end()); 145 146 // for (int debugIter = 0; debugIter < m_cleaveIndexList.size(); 147 // ++debugIter) 148 // { 149 // qDebug() << __FILE__ << __LINE__ 150 // << "Index:" << m_cleaveIndexList.at(debugIter); 151 // } 152 153 if (reset) 154 emptyOligomerList(); 155 156 for (int iter = 0 ; iter <= m_cleaveOptions.partials(); ++iter) 157 { 158 if (cleavePartial(iter) == -1) 159 { 160 qDebug() << __FILE__ << __LINE__ 161 << "Failed to perform partial cleavage at index:" 162 << iter; 163 164 return false; 165 } 166 } 167 168 // At this point we have the list of lists of oligomers, one list of 169 // oligomers for each partial cleavage. 170 171 while (m_cleaveIndexList.size()) 172 m_cleaveIndexList.removeFirst(); 173 174 while (m_noCleaveIndexList.size()) 175 m_noCleaveIndexList.removeFirst(); 176 177 return true; 178 } 179 180 181 int fillIndexLists()182 Cleaver::fillIndexLists() 183 { 184 QList<CleaveMotif *> *motifList = m_cleaveOptions.motifList(); 185 186 while (m_cleaveIndexList.size()) 187 m_cleaveIndexList.removeFirst(); 188 189 while (m_noCleaveIndexList.size()) 190 m_noCleaveIndexList.removeFirst(); 191 192 // Since version 2.3.0, the cleavage might be performed on a 193 // selected portion of a sequence only, not necessarily on the 194 // whole polymer sequence. 195 196 CoordinateList coordinateList = m_calcOptions.coordinateList(); 197 Coordinates *coordinates = coordinateList.first(); 198 199 int startIndex = coordinates->start(); 200 int endIndex = coordinates->end(); 201 202 for (int iter = 0; iter < motifList->size(); ++iter) 203 { 204 CleaveMotif *cleaveMotif = motifList->at(iter); 205 Q_ASSERT(cleaveMotif); 206 207 int index = startIndex -1; 208 209 while (1) 210 { 211 index = findCleaveMotif(*cleaveMotif, index + 1, endIndex); 212 213 if (index == -1) 214 break; 215 216 // Do not forget: The position at which the motif is found 217 // in the polymer sequence is not necessarily the position 218 // at which the cleavage will effectively occur. Indeed, 219 // let's say that we found such motif in the polymer 220 // sequence: "KKRKGP". This motif was extracted from a 221 // cleave spec that had a pattern like this: "KKRK/GP". What 222 // we see here is that the cleavage occurs after the fourth 223 // monomer! And we must realize that the 'index' returned 224 // above corresponds to the index of the first 'K' in 225 // "KKRKGP" motif that was found in the polymer 226 // sequence. Thus we have to take into account the offset 227 //(+4, in our example, WHICH IS A POSITION and not an 228 // index, which is why we need to remove 1 below) of the 229 // cleavage: 230 231 int cleavageIndex = index + cleaveMotif->offset() - 1; 232 233 if (cleavageIndex < 0) 234 continue; 235 236 if (cleavageIndex >= endIndex) 237 break; 238 239 // qDebug() << __FILE__ << __LINE__ 240 // << "Found new cleavage index:" 241 // << cleavageIndex; 242 243 if (cleaveMotif->isForCleave()) 244 { 245 m_cleaveIndexList.append(cleavageIndex); 246 247 // qDebug() << __FILE__ << __LINE__ 248 // << "For cleavage, index:" << cleavageIndex; 249 } 250 else 251 { 252 m_noCleaveIndexList.append(cleavageIndex); 253 254 // qDebug() << __FILE__ << __LINE__ 255 // << "Not for cleavage"; 256 } 257 258 } 259 // End of 260 // while (1) 261 } 262 // End of 263 // for (int iter = 0; iter < motifList->size(); ++iter) 264 265 // Note that returning 0 is not an error condition, because a 266 // sequence where no site is found whatsoever will result in 0. 267 return m_cleaveIndexList.size() + m_noCleaveIndexList.size(); 268 } 269 270 271 int resolveCleavageNoCleavage()272 Cleaver::resolveCleavageNoCleavage() 273 { 274 for (int iter = 0; iter < m_noCleaveIndexList.size(); ++iter) 275 { 276 int noCleaveIndex = m_noCleaveIndexList.at(iter); 277 278 for(int jter = 0; jter < m_cleaveIndexList.size(); ++jter) 279 { 280 int cleaveIndex = m_cleaveIndexList.at(jter); 281 282 if (noCleaveIndex == cleaveIndex) 283 m_cleaveIndexList.removeAt(jter); 284 } 285 } 286 287 return m_cleaveIndexList.size(); 288 } 289 290 291 int removeDuplicatesCleavage()292 Cleaver::removeDuplicatesCleavage() 293 { 294 for (int iter = 0; iter < m_cleaveIndexList.size(); ++iter) 295 { 296 int index = m_cleaveIndexList.at(iter); 297 298 int foundItemIndex = m_cleaveIndexList.indexOf(index, iter + 1); 299 300 if (foundItemIndex != -1) 301 { 302 m_cleaveIndexList.removeAt(foundItemIndex); 303 --iter; 304 } 305 } 306 307 return m_cleaveIndexList.size(); 308 } 309 310 311 int findCleaveMotif(CleaveMotif & cleaveMotif,int startIndex,int endIndex)312 Cleaver::findCleaveMotif(CleaveMotif &cleaveMotif, 313 int startIndex, int endIndex) 314 { 315 bool noGood = false; 316 317 int firstIndex = 0; 318 319 QList<const Monomer *> monomerList = mp_polymer->monomerList(); 320 const QStringList &codeList = cleaveMotif.codeList(); 321 322 323 //We have to iterate in the polymer sequence starting at 'index', in 324 //search for a sequence element identical to the sequence in the 325 //'cleaveMotif'. 326 327 // This means that if 328 329 // cleavemotif->m_motifList [0] = "Lys" 330 331 // cleavemotif->m_motifList [1] = "Pro" 332 333 // the, we want to search in the polymer list of monomers the same 334 // sequence by iterating in this list from index 'index' onwards, 335 // and we stop searching when the list's end is found or if 336 337 // list [n] = "Lys" and 338 339 // list [n+1] = "Pro". 340 341 342 if (mp_polymer->size() == 0) 343 return 0; 344 345 if (codeList.size() == 0) 346 return -1; 347 348 if (startIndex < 0) 349 return -1; 350 351 if (endIndex >= mp_polymer->size()) 352 return -1; 353 354 // Seed the routine by setting 'first' to the first motif in the 355 // codeList (in our example this is "Lys"). 356 357 QString firstCode = codeList.first(); 358 359 // And now iterate (starting from 'index') in the polymer 360 // sequence's list of monomers in search for a monomer having the 361 // proper code ("Lys"). 362 363 int iterIndex = startIndex; 364 365 while (iterIndex < endIndex) 366 { 367 const Monomer *monomer = monomerList.at(iterIndex); 368 369 if (monomer->code() != firstCode) 370 { 371 // The currently iterated code is not the one we 372 // search. So go one code further in the sequence. 373 374 ++iterIndex; 375 continue; 376 } 377 378 // If we are here, then that means that we actually found on 379 // monomer code in the sequence that matches the one we are 380 // looking for. 381 382 firstIndex = iterIndex; 383 noGood = false; 384 385 // Now that we have anchored our search at firstIndex in the 386 // polymer sequence, continue with next monomer and check if 387 // it matches the next monomer in the motif we are looking 388 // for. 389 390 for (int iter = 1; iter < codeList.size(); ++iter) 391 { 392 if (iterIndex + iter >= endIndex) 393 { 394 noGood = true; 395 break; 396 } 397 398 QString nextCode = codeList.at(iter); 399 400 monomer = monomerList.at(iterIndex + iter); 401 402 if (monomer->code() == nextCode) 403 continue; 404 else 405 { 406 noGood = true; 407 break; 408 } 409 } 410 // End of 411 // for (int iter = 1; iter < codeList.size(); ++iter) 412 413 if (noGood) 414 { 415 ++iterIndex; 416 continue; 417 } 418 else 419 { 420 return firstIndex; 421 } 422 } 423 // End of 424 // while (iterIndex < monomerList.size()) 425 426 return -1; 427 } 428 429 430 bool accountCleaveRule(CleaveRule * cleaveRule,CleaveOligomer * oligomer)431 Cleaver::accountCleaveRule(CleaveRule *cleaveRule, 432 CleaveOligomer *oligomer) 433 { 434 Q_ASSERT(cleaveRule); 435 Q_ASSERT(oligomer); 436 437 const QList<Atom *> &refList = 438 oligomer->polymer()->polChemDef()->atomList(); 439 440 // For each Coordinates element in the oligomer, we have to ensure 441 // we apply the formula(s) that is/are required. 442 443 int coordinatesCount = static_cast<CoordinateList *>(oligomer)->size(); 444 445 // qDebug() << __FILE__ << __LINE__ 446 // << "Coordinates count:" << coordinatesCount; 447 448 for (int iter = 0; iter < coordinatesCount; ++iter) 449 { 450 Coordinates *coordinates = 451 static_cast<CoordinateList *>(oligomer)->at(iter); 452 453 if (!cleaveRule->leftCode().isEmpty()) 454 { 455 // The formula has to be there. 456 457 Formula ruleFormula = cleaveRule->leftFormula(); 458 Q_ASSERT(!ruleFormula.text().isEmpty()); 459 460 // What is the monomer at the left end of current oligomer ? 461 const Monomer &monomer = oligomer->atLeftEnd(); 462 463 if (monomer.code() == cleaveRule->leftCode()) 464 { 465 // qDebug() << __FILE__ << __LINE__ 466 // << "Matched left code:" << cleaveRule->leftCode(); 467 468 // But, this is not going to be real true, if the 469 // monomer is acutally the left-end monomer of the 470 // polymer sequence, because then that would mean that 471 // there was no cleavage on this monomer, thus no rule 472 // to apply. 473 474 if (!coordinates->start()) 475 { 476 // The monomer is not the left-end monomer, so the 477 // match is real. Account for the formula ! 478 479 if (!ruleFormula.accountMasses(refList, oligomer)) 480 return false; 481 } 482 } 483 } 484 485 if (!cleaveRule->rightCode().isEmpty()) 486 { 487 // The formula has to be there. 488 489 Formula ruleFormula = cleaveRule->rightFormula(); 490 Q_ASSERT(!ruleFormula.text().isEmpty()); 491 492 // What is the monomer at the right end of current oligomer ? 493 const Monomer &monomer = oligomer->atRightEnd(); 494 495 if (monomer.code() == cleaveRule->rightCode()) 496 { 497 // qDebug() << __FILE__ << __LINE__ 498 // << "Matched right code:" << cleaveRule->rightCode(); 499 500 // But, this is not going to be real true, if the 501 // monomer is acutally the right-end monomer of the 502 // polymer sequence, because then that would mean that 503 // there was no cleavage on this monomer, thus no rule 504 // to apply. 505 506 if (coordinates->end() != mp_polymer->size() - 1) 507 { 508 // The monomer is not the right-end monomer, so 509 // the match is real. Account for the formula ! 510 511 if (!ruleFormula.accountMasses(refList, oligomer)) 512 return false; 513 } 514 } 515 } 516 } 517 518 return true; 519 } 520 521 522 int cleavePartial(int partialCleavageValue)523 Cleaver::cleavePartial(int partialCleavageValue) 524 { 525 bool oligomerIsPolymer = false; 526 527 int iter = 0; 528 529 static int leftIndex = 0; 530 static int rightIndex = 0; 531 532 Q_ASSERT(partialCleavageValue >= 0); 533 534 OligomerList partialOligomerList; 535 536 // Since version 2.3.0, the cleavage might be performed on a 537 // selected portion of a sequence only, not necessarily on the 538 // whole polymer sequence. We have to know these values because 539 // when we create oligomer we'll have to set the coordinates of 540 // the monomers correctly. 541 542 CoordinateList coordinateList = m_calcOptions.coordinateList(); 543 Coordinates *coordinates = coordinateList.first(); 544 545 int startIndex = coordinates->start(); 546 int endIndex = coordinates->end(); 547 548 leftIndex = startIndex; 549 rightIndex = 0; 550 551 // Iterate in the array of indices where the cleavages should occur. 552 553 for (iter = 0; iter < m_cleaveIndexList.size(); ++iter) 554 { 555 // Make sure, if the partial cleavage is very large, for 556 // example, that it will not lead us to access the polymer 557 // sequence at a position larger than its upper boundary. 558 559 // Imagine cutting a polymer with only one Met residue with 560 // cyanogen bromide: m_cleaveIndexList will contain a single 561 // element: the index at which the methionine occurs in the 562 // polymer sequence(and there is a single one). Now, Imagine 563 // that we are asked to perform a cleavage with 564 // 'partialCleavageValue' of 2. The way we do it is that we fetch the 565 // index in the list of cleavage indices(m_cleaveIndexList) two 566 // positions farther than the position we are iterating: 567 568 // int partCleave = iter + partialCleavageValue; 569 570 // Now, if m_cleaveIndexList contains a single element, asking 571 // for this m_cleaveIndexList.at(iter + partialCleavageValue) will 572 // go out of the boundaries of the list, since it has a single 573 // item and partialCleavageValue is 2. This is what we are willing to 574 // avoid. 575 576 int partCleave = iter + partialCleavageValue; 577 578 if (partCleave >= m_cleaveIndexList.size()) 579 { 580 if (iter == 0) 581 oligomerIsPolymer = true; 582 583 break; 584 } 585 586 rightIndex = m_cleaveIndexList.at(partCleave); 587 588 QString name = QString("%1#%2") 589 .arg(partialCleavageValue) 590 .arg(iter + 1); 591 592 CleaveOligomer *oligomer = 593 new CleaveOligomer(mp_polymer, 594 name, m_cleaveOptions.name(), 595 Ponderable(), 596 leftIndex, rightIndex, 597 partialCleavageValue, m_calcOptions); 598 599 partialOligomerList.append(oligomer); 600 601 leftIndex = m_cleaveIndexList.at(iter) + 1; 602 } 603 // End of 604 // for (int iter = 0; iter < m_cleaveIndexList.size(); iter=+) 605 606 // Do not forget the right-end oligomer ! And be sure to determine 607 // what's its real left end index ! 608 609 if (oligomerIsPolymer) 610 leftIndex = startIndex; 611 else 612 leftIndex = m_cleaveIndexList.at(--iter) + 1; 613 614 // 'iter' is used to construct the name of the oligomer, so we have 615 // to increment it once because we did not have the opportunity to 616 // increment it between the last but one oligomer and this one. 617 618 ++iter; 619 620 // Remove 1 because the oligomer is described using indices and not 621 // positions. 622 623 rightIndex = endIndex; 624 625 QString name = QString("%1#%2") 626 .arg(partialCleavageValue) 627 .arg(iter + 1); 628 629 CleaveOligomer *oligomer = 630 new CleaveOligomer(mp_polymer, 631 name, m_cleaveOptions.name(), 632 Ponderable(), 633 leftIndex, rightIndex, 634 partialCleavageValue, m_calcOptions); 635 636 partialOligomerList.append(oligomer); 637 638 // At this point all the skeleton oligomers have been computed for 639 // the given partialCleavageValue. We still have to perform the 640 // cross-link analysis prior to both calculate the masses and 641 // perform the ionization of all the generated oligomers. Note that 642 // making cross-link analysis is only useful in case the cleavage is 643 // full(partialCleavageValue == 0). 644 645 if (!partialCleavageValue) 646 { 647 if (m_calcOptions.monomerEntities() & MXT_MONOMER_CHEMENT_CROSS_LINK) 648 { 649 if (analyzeCrossLinks(&partialOligomerList) == -1) 650 { 651 return false; 652 } 653 } 654 } 655 656 // Finally, we can now perform the mass calculations and the 657 // ionization. We will use each oligomer in the oligomerList as a 658 // template for creating new oligomers(with different z values) and 659 // all the new oligomers will be appended to waitOligomerList. Each 660 // time a template oligomer will have been used, it will be removed 661 // from oligomerList. Once all the oligomers in oligomerList will 662 // have been used, and thus removed, all the newly allocated 663 // oligomers in waitOligomerList will be moved to oligomerList. 664 665 OligomerList waitOligomerList; 666 667 while (partialOligomerList.size()) 668 { 669 CleaveOligomer *iterOligomer = 670 static_cast<CleaveOligomer *>(partialOligomerList.takeFirst()); 671 672 // We do not ask that the oligomer be ionized yet, because we 673 // have to first account for potential cleavage rules! Thus we 674 // pass an uninitialized ionization rule with IonizeRule(). 675 // iterOligomer->calculateMasses(*mp_calcOptions, 676 // *mp_ionizeRule); This was a bug in the release versions up 677 // to 1.6.1. 678 iterOligomer->calculateMasses(&m_calcOptions); 679 680 // At this point we should test if the oligomer has to be 681 // processed using cleavage rules. 682 683 for(int jter = 0; jter < m_cleaveOptions.ruleList()->size(); ++jter) 684 { 685 // Note that the accounting of the cleavage rule is 686 // performed as if the oligomer was charged 1. This is why 687 // we have to ionize the oligomer only after we have 688 // completed the determination of its atomic composition. 689 690 CleaveRule *cleaveRule = m_cleaveOptions.ruleList()->at(jter); 691 692 // qDebug() << __FILE__ << __LINE__ 693 // << "Accounting for cleaverule:" 694 // << cleaveRule->name(); 695 696 // qDebug() << __FILE__ << __LINE__ 697 // << "Oligomer mono mass before:" << iterOligomer->mono(); 698 699 if (!accountCleaveRule(cleaveRule, iterOligomer)) 700 return -1; 701 702 // qDebug() << __FILE__ << __LINE__ 703 // << "Oligomer mono mass after:" << iterOligomer->mono(); 704 } 705 706 // At this point we can finally ionize the oligomer ! Remember 707 // that we have to ionize the oligomer as expected in the 708 // cleavage options. Because the ionization changes the values 709 // in the oligomer, and we need a new oligomer each time, we 710 // duplicate the oligomer each time we need it. 711 712 int startIonizeLevel = m_cleaveOptions.startIonizeLevel(); 713 int endIonizeLevel = m_cleaveOptions.endIonizeLevel() + 1; 714 IonizeRule ionizeRule(m_ionizeRule); 715 716 for(int kter = startIonizeLevel; kter < endIonizeLevel; ++kter) 717 { 718 ionizeRule.setLevel(kter); 719 720 CleaveOligomer *newOligomer = new CleaveOligomer(*iterOligomer); 721 722 if (newOligomer->ionize(ionizeRule) == -1) 723 { 724 delete newOligomer; 725 726 return -1; 727 } 728 729 // The name was set already during the creation of the 730 // template oligomer. All we have to add to the name is the 731 // ionization level. 732 733 QString name = iterOligomer->name() + 734 QString("#z=%3").arg(newOligomer->charge()); 735 736 newOligomer->setName(name); 737 738 waitOligomerList.append(newOligomer); 739 } 740 741 // We can delete the template oligomer that was already removed 742 // from the oligomerList(use of QList::takeFirst()). 743 delete iterOligomer; 744 } 745 746 // At this point we should transfer all the oligomers from the 747 // waitOligomerList to the initial oligomerList. 748 749 while (waitOligomerList.size()) 750 { 751 Oligomer *iterOligomer = waitOligomerList.takeFirst(); 752 753 partialOligomerList.append(iterOligomer); 754 } 755 756 // Finally transfer all the oligomers generated for this partial 757 // cleavage to the list of ALL the oligomers. But before making 758 // the transfert, compute the elemental composition and store it 759 // as a property object. 760 761 int oligomerCount = partialOligomerList.size(); 762 763 while (partialOligomerList.size()) 764 { 765 Oligomer *iterOligomer = partialOligomerList.takeFirst(); 766 767 // Elemental formula 768 QString *text = new QString(iterOligomer->elementalComposition()); 769 StringProp *prop = new StringProp("ELEMENTAL_COMPOSITION", text); 770 iterOligomer->appendProp(static_cast<Prop *>(prop)); 771 772 mp_oligomerList->append(iterOligomer); 773 } 774 775 return oligomerCount; 776 } 777 778 779 int analyzeCrossLinks(OligomerList * oligomerList)780 Cleaver::analyzeCrossLinks(OligomerList *oligomerList) 781 { 782 Q_ASSERT(oligomerList); 783 784 QList<Oligomer *> crossLinkedOligomerList; 785 786 // General overview: 787 788 // Iterate in the polymer's list of cross-links and for each 789 // cross-link find the oligomer that contains the first monomer 790 // involved in the cross-link. This first found oligomer should 791 // serve as a seed to pull-down all the oligomers cross-linked to 792 // it. 793 794 const CrossLinkList &crossLinkList = mp_polymer->crossLinkList(); 795 796 for (int iter = 0; iter < crossLinkList.size(); ++iter) 797 { 798 CrossLink *crossLink = crossLinkList.at(iter); 799 800 // With that crossLink, find an oligomer that encompasses the 801 // first monomer of the cross-link. 802 803 const Monomer *firstMonomer = crossLink->firstMonomer(); 804 805 Q_ASSERT(firstMonomer); 806 807 // What oligomer does encompass that monomer ? 808 809 int foundIndex = 0; 810 811 Oligomer *firstOligomer = 812 oligomerList->findOligomerEncompassing(firstMonomer, &foundIndex); 813 814 if (firstOligomer) 815 { 816 // At this point we should turn this oligomer into a 817 // cross-linked oligomer, so that we can continue performing its 818 // cross-link analysis. To do that we allocate a list of 819 // oligomers for this cross-linked oligomer, were we'll store 820 // this first oligomer and then all the "pulled-down" oligomers. 821 822 // Remove the cross-link from the main list of oligomers so 823 // that we do not stumble upon it in the next analysis 824 // steps. 825 oligomerList->removeAt(foundIndex); 826 827 // Set the cross-linked oligomer apart. 828 crossLinkedOligomerList.append(firstOligomer); 829 830 // Finally deeply scrutinize the oligomer. 831 analyzeCrossLinkedOligomer(firstOligomer, oligomerList); 832 } 833 else 834 { 835 // qDebug() << __FILE__ << __LINE__ 836 // << "Cross-link at index" << iter 837 // << "did not find any oligomer for its first monomer " 838 // "partner"; 839 } 840 } 841 842 // At this point we have terminated analyzing all the oligomers 843 // for the partial cleavage. All we have to do is move all the 844 // crossLinked oligomers from the crossLinkedOligomerList to 845 // oligomerList. While doing so make sure that the m_calcOptions 846 // datum has correct CoordinateList data, as these data will be 847 // required later, typically to calculate the elemental formula of 848 // the oligomer. 849 850 while (crossLinkedOligomerList.size()) 851 { 852 Oligomer *oligomer = crossLinkedOligomerList.takeAt(0); 853 854 oligomer->updateCalcOptions(); 855 856 oligomerList->append(oligomer); 857 } 858 859 crossLinkedOligomerList.clear(); 860 861 // Return the number of cross-linked/non-cross-linked oligomers 862 // alltogether. 863 864 return oligomerList->size(); 865 } 866 867 868 int analyzeCrossLinkedOligomer(Oligomer * oligomer,OligomerList * oligomerList)869 Cleaver::analyzeCrossLinkedOligomer(Oligomer *oligomer, 870 OligomerList *oligomerList) 871 { 872 Q_ASSERT(oligomer); 873 Q_ASSERT(oligomerList); 874 875 const CrossLinkList &crossLinkList = mp_polymer->crossLinkList(); 876 877 OligomerList clearanceOligomerList; 878 879 // 'oligomer' is the first oligomer in the cross-link series of 880 // oligomers. It is the "seeding" oligomer with which to pull-down 881 // all the others. Prepend to its name the "cl-" string to let it 882 // know it is cross-linked. 883 884 QString name = oligomer->name(); 885 name.prepend("cl-"); 886 oligomer->setName(name); 887 888 // Iterate in the 'oligomer' and for each monomer get any 889 // cross-linked oligomer out of the list of cross-links. 890 891 for (int iter = oligomer->startIndex(); 892 iter < oligomer->endIndex() + 1; ++iter) 893 { 894 const Monomer *monomer = mp_polymer->at(iter); 895 896 // What crossLinks do involve this monomer ? 897 898 QList<int> crossLinkIndices; 899 900 int ret = 901 crossLinkList.crossLinksInvolvingMonomer(monomer, 902 &crossLinkIndices); 903 904 if (ret) 905 { 906 // At least one cross-link involves the monomer currently 907 // iterated in the oligomer being analysed. 908 909 int index = 0; 910 911 foreach(index, crossLinkIndices) 912 { 913 CrossLink *crossLink = crossLinkList.at(index); 914 915 // qDebug() << __FILE__ << __LINE__ 916 // << crossLink->name(); 917 918 // First off, we can add the cross-link to the list of 919 // cross-links of the oligomer(we'll need them to be 920 // able to perform mass calculations). Note that this is 921 // only copying the pointer to the actual cross-link in 922 // the polymer's list of cross-links. Note also that a 923 // cross-link might not be found more than once(the 924 // call below first checks that the cross-link is not 925 // already in the list). 926 927 if (!oligomer->addCrossLink(crossLink)) 928 { 929 // qDebug() << __FILE__ << __LINE__ 930 // << "The cross-link:" 931 // << crossLink->name() 932 // << "was already in the" 933 // << oligomer 934 // << "oligomer's list of cross-links: " 935 // "not duplicated."; 936 } 937 else 938 { 939 // qDebug() << __FILE__ << __LINE__ 940 // << "The cross-link:" 941 // << crossLink->name() 942 // << "was added to the" 943 // << oligomer 944 // << "oligomer's list of cross-links."; 945 } 946 947 const Monomer *iterMonomer = 0; 948 949 foreach(iterMonomer, *(crossLink->monomerList())) 950 { 951 // qDebug() << __FILE__ << __LINE__ 952 // << iterMonomer->name(); 953 954 int foundIndex = 0; 955 956 Oligomer *foundOligomer = 957 oligomerList->findOligomerEncompassing(iterMonomer, 958 &foundIndex); 959 960 if (foundOligomer) 961 { 962 // qDebug() << __FILE__ << __LINE__ 963 // << foundOligomer->name() << foundIndex; 964 965 // One oligomer in the original oligomer list 966 // encompasses a monomer that seems to be 967 // cross-linked to the 'monomer' being iterated 968 // in in the currently analyzed oligomer. Move 969 // that oligomer to the clearance list of 970 // oligomer that will need to be further 971 // analyzed later. 972 973 oligomerList->removeAt(foundIndex); 974 975 clearanceOligomerList.append(foundOligomer); 976 977 // Update the name of the oligomer with the name 978 // of the new foundOligomer. 979 980 QString name = QString("%1+%2") 981 .arg(oligomer->name()) 982 .arg(foundOligomer->name()); 983 oligomer->setName(name); 984 } 985 } 986 } 987 // End of 988 // foreach(index, crossLinkIndices) 989 } 990 } 991 992 // At this point we have one oligomer which we know is cross-linked 993 // at least once(with another oligomer or the cross-link is between 994 // two or more monomers in the same oligomer, think cyan fluorescent 995 // protein). If monomers in that same oligomer were cross-linked to 996 // other monomers in other oligomers, then these oligomers should by 997 // now have been moved from the original list of oligomers 998 //(oligomerList) to the clearance list of oligomers 999 //(clearanceOligomerList). We have to iterate in each oligomer of that 1000 // clearance list and for each of its monomers, check if it has a 1001 // cross-link to any oligomer still in the original oligomerList 1002 //(this is what I call "pull-down" stuff). Found oligomers are 1003 // appended to the clearanceOligomerList. 1004 1005 while (clearanceOligomerList.size()) 1006 { 1007 Oligomer *iterOligomer = clearanceOligomerList.first(); 1008 1009 for(int iter = iterOligomer->startIndex(); 1010 iter < iterOligomer->endIndex() + 1; ++iter) 1011 { 1012 const Monomer *monomer = mp_polymer->at(iter); 1013 1014 // qDebug() << __FILE__ << __LINE__ 1015 // << monomer->name(); 1016 1017 // What crossLinks do involve this monomer ? 1018 1019 QList<int> crossLinkIndices; 1020 1021 int ret = 1022 crossLinkList.crossLinksInvolvingMonomer(monomer, 1023 &crossLinkIndices); 1024 1025 if (ret) 1026 { 1027 // At least one cross-link involves the monomer currently 1028 // iterated in the iterOligomer being analysed. 1029 1030 int index = 0; 1031 1032 foreach(index, crossLinkIndices) 1033 { 1034 CrossLink *crossLink = crossLinkList.at(index); 1035 1036 // qDebug() << __FILE__ << __LINE__ 1037 // << crossLink->name(); 1038 1039 // First off, we can add the cross-link to the list of 1040 // cross-links of the oligomer(we'll need them to be 1041 // able to perform mass calculations). Note that this is 1042 // only copying the pointer to the actual cross-link in 1043 // the polymer's list of cross-links. Note also that a 1044 // cross-link might not be found more than once(the 1045 // call below first checks that the cross-link is not 1046 // already in the list). 1047 1048 if (!oligomer->addCrossLink(crossLink)) 1049 { 1050 // qDebug() << __FILE__ << __LINE__ 1051 // << "The cross-link:" 1052 // << crossLink->name() 1053 // << "was already in the" 1054 // << oligomer 1055 // << "oligomer's list of cross-links: " 1056 // "not duplicated."; 1057 } 1058 else 1059 { 1060 // qDebug() << __FILE__ << __LINE__ 1061 // << "The cross-link:" 1062 // << crossLink->name() 1063 // << "was added to the" 1064 // << oligomer 1065 // << "oligomer's list of cross-links."; 1066 } 1067 1068 const Monomer *iterMonomer = 0; 1069 1070 foreach(iterMonomer, *(crossLink->monomerList())) 1071 { 1072 // qDebug() << __FILE__ << __LINE__ 1073 // << iterMonomer->name(); 1074 1075 int foundIndex = 0; 1076 1077 Oligomer *foundOligomer = 1078 oligomerList->findOligomerEncompassing(iterMonomer, 1079 &foundIndex); 1080 1081 if (foundOligomer) 1082 { 1083 // qDebug() << __FILE__ << __LINE__ 1084 // << foundOligomer->name() << foundIndex; 1085 1086 // One oligomer in the original oligomer list 1087 // encompasses a monomer that seems to be 1088 // cross-linked to the 'monomer' being iterated 1089 // in in the currently analyzed oligomer. Move 1090 // that oligomer to the clearance list of 1091 // oligomer that will need to be further 1092 // analyzed later. 1093 1094 oligomerList->removeAt(foundIndex); 1095 1096 clearanceOligomerList.append(foundOligomer); 1097 1098 // Update the name of the oligomer with the name 1099 // of the new foundOligomer. 1100 1101 QString name = QString("%1+%2") 1102 .arg(oligomer->name()) 1103 .arg(foundOligomer->name()); 1104 oligomer->setName(name); 1105 } 1106 } 1107 } 1108 // End of 1109 // foreach(index, crossLinkIndices) 1110 } 1111 // End of(ret) ie cross-links involved monomer 1112 } 1113 // End of 1114 // for (int iter = iterOligomer->startIndex(); 1115 // iter < iterOligomer->endIndex() + 1; ++iter) 1116 1117 // At this point this quarantinized oligomer might be removed 1118 // from the clearance clearanceOligomerList and its coordinates 1119 // be appended to the 'oligomer' list of coordinates. Then, the 1120 // quanrantinized oligomer might be destroyed. 1121 1122 clearanceOligomerList.removeFirst(); 1123 1124 oligomer->appendCoordinates(iterOligomer); 1125 1126 delete iterOligomer; 1127 } 1128 1129 // At this point, all the oligomers in the clearance oligomer list 1130 // have all been dealt with, return the number of cross-linked 1131 // oligomers in this oligomer. 1132 1133 // for (int iter = 0; iter < oligomer->crossLinkList()->size(); ++iter) 1134 // qDebug() << __FILE__ << __LINE__ 1135 // << "Finished for oligomer:" << 1136 // oligomer->crossLinkList()->at(iter)->name(); 1137 1138 1139 return static_cast<CoordinateList *>(oligomer)->size(); 1140 } 1141 1142 1143 void emptyOligomerList()1144 Cleaver::emptyOligomerList() 1145 { 1146 while (mp_oligomerList->size()) 1147 { 1148 delete mp_oligomerList->takeFirst(); 1149 } 1150 } 1151 1152 } // namespace massXpert 1153