1 /* massXpert - the true massist's program. 2 3 Copyright(C) 2006,2007 Filippo Rusconi 4 5 http://www.massxpert.org/massXpert 6 7 This file is part of the massXpert project. 8 9 The massxpert project is the successor to the "GNU polyxmass" 10 project that is an official GNU project package(see 11 www.gnu.org). The massXpert project is not endorsed by the GNU 12 project, although it is released ---in its entirety--- under the 13 GNU General Public License. A huge part of the code in massXpert 14 is actually a C++ rewrite of code in GNU polyxmass. As such 15 massXpert was started at the Centre National de la Recherche 16 Scientifique(FRANCE), that granted me the formal authorization to 17 publish it under this Free Software License. 18 19 This software is free software; you can redistribute it and/or 20 modify it under the terms of the GNU General Public 21 License version 3, as published by the Free Software Foundation. 22 23 24 This software is distributed in the hope that it will be useful, 25 but WITHOUT ANY WARRANTY; without even the implied warranty of 26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 27 General Public License for more details. 28 29 You should have received a copy of the GNU General Public License 30 along with this software; if not, write to the 31 32 Free Software Foundation, Inc., 33 34 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 35 */ 36 37 38 /////////////////////// Qt includes 39 #include <QChar> 40 #include <QString> 41 42 43 /////////////////////// Local includes 44 #include "formula.hpp" 45 46 47 namespace massXpert 48 { 49 50 //! Constructs a formula initialized with formula and ... 51 /*! Initialization of the formula is done by \p formula. Upon 52 construction of the formula, no parsing occurs. 53 54 \p formula gets simply copied into the member formula. 55 56 \param formula formula in the form of a string. Defaults to a null 57 string. 58 */ Formula(const QString & formula)59 Formula::Formula(const QString &formula) 60 { 61 if (!formula.isEmpty()) 62 m_formula = formula; 63 } 64 65 66 //! Constructs a copy of \p other. 67 /*! 68 69 \param other formula to be used as a mold. 70 */ Formula(const Formula & other)71 Formula::Formula(const Formula &other) 72 : m_formula(other.m_formula), 73 m_plusFormula(other.m_plusFormula), 74 m_minusFormula(other.m_minusFormula) 75 { 76 AtomCount *atomCount = 0; 77 78 for (int iter = 0 ; iter < other.m_atomCountList.size(); ++iter) 79 { 80 atomCount = new AtomCount(*other.m_atomCountList.at(iter)); 81 82 m_atomCountList.append(atomCount); 83 } 84 } 85 86 87 //! Destroys the formula. ~Formula()88 Formula::~Formula() 89 { 90 // Remove all the items in the atomcountList of dynamically 91 // allocated Atomcount instances. 92 93 while(!m_atomCountList.isEmpty()) 94 delete m_atomCountList.takeFirst(); 95 } 96 97 98 //! Returns the list of atom count objects. 99 /*! 100 \return The list of atom count objects. 101 */ 102 const QList<AtomCount *> & atomCountList() const103 Formula::atomCountList() const 104 { 105 return m_atomCountList; 106 } 107 108 109 //! Creates a new formula initialized using \p this. 110 /*! The initialization involved copying all the data from \p this, 111 including the atom count lists. 112 113 \return The new formula, which should be deleted when no more in 114 use. 115 */ 116 Formula * clone() const117 Formula::clone() const 118 { 119 Formula *other = new Formula(*this); 120 121 return other; 122 } 123 124 //! Modifies \p other to be identical to \p this. 125 /*! 126 127 \param other formula. 128 */ 129 void clone(Formula * other) const130 Formula::clone(Formula *other) const 131 { 132 if (other == this) 133 return; 134 135 AtomCount *atomCount = 0; 136 137 Q_ASSERT(other); 138 139 other->m_formula = m_formula; 140 other->m_plusFormula = m_plusFormula; 141 other->m_minusFormula = m_minusFormula; 142 143 while(!other->m_atomCountList.isEmpty()) 144 delete other->m_atomCountList.takeFirst(); 145 146 for (int iter = 0 ; iter < m_atomCountList.size(); ++iter) 147 { 148 atomCount = new AtomCount(); 149 150 m_atomCountList.at(iter)->clone(atomCount); 151 152 other->m_atomCountList.append(atomCount); 153 } 154 } 155 156 157 //! Modifies \p this to be identical to \p other. 158 /*! 159 160 \param other formula to be used as a mold. 161 */ 162 void mold(const Formula & other)163 Formula::mold(const Formula &other) 164 { 165 if (&other == this) 166 return; 167 168 AtomCount *atomCount = 0; 169 170 m_formula = other.m_formula; 171 m_plusFormula = other.m_plusFormula; 172 m_minusFormula = other.m_minusFormula; 173 174 while(!m_atomCountList.isEmpty()) 175 delete m_atomCountList.takeFirst(); 176 177 for (int iter = 0 ; iter < other.m_atomCountList.size(); ++iter) 178 { 179 atomCount = new AtomCount(); 180 181 atomCount->mold(*other.m_atomCountList.at(iter)); 182 183 m_atomCountList.append(atomCount); 184 } 185 } 186 187 188 //! Assigns \p other to \p this formula. 189 /*! 190 \param other formula. 191 192 \return true if the formulas are identical, false otherwise. 193 */ 194 Formula & operator =(const Formula & other)195 Formula::operator =(const Formula &other) 196 { 197 if (&other != this) 198 mold(other); 199 200 return *this; 201 } 202 203 204 //! Sets the formula. 205 /*! 206 207 \param formula formula initializer. 208 */ 209 void setFormula(const QString & formula)210 Formula::setFormula(const QString &formula) 211 { 212 m_formula = formula; 213 } 214 215 216 //! Sets the formula. 217 /*! 218 219 \param formula formula initializer. 220 */ 221 void setFormula(const Formula & formula)222 Formula::setFormula(const Formula &formula) 223 { 224 m_formula = formula.m_formula; 225 } 226 227 228 //! Returns the formula. 229 /*! 230 231 \return the formula as a string. 232 */ 233 QString text() const234 Formula::text() const 235 { 236 return m_formula; 237 } 238 239 240 void clear()241 Formula::clear() 242 { 243 m_formula.clear(); 244 m_plusFormula.clear(); 245 m_minusFormula.clear(); 246 247 while(!m_atomCountList.isEmpty()) 248 delete m_atomCountList.takeFirst(); 249 } 250 251 252 //! Sets the plus-formula. 253 /*! 254 255 \param formula formula initializer. 256 */ 257 void setPlusFormula(const QString & formula)258 Formula::setPlusFormula(const QString &formula) 259 { 260 m_plusFormula = formula; 261 } 262 263 264 //! Returns the plus-formula. 265 /*! 266 267 \return the plus-formula as a string. 268 */ 269 const QString & plusFormula() const270 Formula::plusFormula() const 271 { 272 return m_plusFormula; 273 } 274 275 276 //! Sets the minus-formula. 277 /*! 278 279 \param formula formula initializer. 280 */ 281 void setMinusFormula(const QString & formula)282 Formula::setMinusFormula(const QString &formula) 283 { 284 m_minusFormula = formula; 285 } 286 287 288 //! Returns the minus-formula. 289 /*! 290 291 \return the formula as a string. 292 */ 293 const QString & minusFormula() const294 Formula::minusFormula() const 295 { 296 return m_minusFormula; 297 } 298 299 300 //! Tests equality. 301 /*! The test only pertains to the formula(not the minus-/plus- 302 formulas). 303 304 \param other formula to be compared with \p this. 305 306 \return true if the formulas are identical, false otherwise. 307 */ 308 bool operator ==(const Formula & other) const309 Formula::operator ==(const Formula &other) const 310 { 311 return(m_formula == other.m_formula); 312 } 313 314 315 //! Tests inequality. 316 /*! The test only pertains to the formula(not the minus-/plus- 317 formulas). 318 319 \param other formula to be compared with \p this. 320 321 \return true if the formulas differ, false otherwise. 322 */ 323 bool operator !=(const Formula & other) const324 Formula::operator !=(const Formula &other) const 325 { 326 return(m_formula != other .m_formula); 327 } 328 329 330 //! Tells the actions found in the formula. 331 /*! Following analysis of the \p formula argument, this function will 332 be able to tell if the formula contains only '+'-associated elements 333 or also '-'-associated elements. 334 335 If a formula contains no sign at all, then it is considered to 336 contain only '+'-associated member. As soon as one member is 337 associated with a '-' action, the minus actions prevails. 338 339 This function is used to quickly have an indication if the 340 splitParts() function is to be run or if it is not necessary. 341 342 \param formula formula to report the actions about. 343 344 \return '+' if no '-' action was found, '-' otherwise. 345 346 \sa splitParts(int times, bool store, bool reset) 347 */ 348 QChar actions(const QString & formula) const349 Formula::actions(const QString &formula) const 350 { 351 int minusCount = formula.count('-', Qt::CaseInsensitive); 352 353 return(minusCount == 0 ? '+' : '-'); 354 } 355 356 357 //! Tells the actions found in the formula. 358 /*! Following analysis of the \p formula argument, this function will 359 be able to tell if the formula contains only '+'-associated elements 360 or also '-'-associated elements. 361 362 If a formula contains no sign at all, then it is considered to 363 contain only '+'-associated member. As soon as one member is 364 associated with a '-' action, the minus actions prevails. 365 366 This function is used to quickly have an indication if the 367 splitParts() function is to be run or if it is not necessary. 368 369 \return '+' if no '-' action was found, '-' otherwise. 370 371 \sa splitParts(int times, bool store, bool reset) 372 */ 373 QChar actions() const374 Formula::actions() const 375 { 376 return actions(m_formula); 377 } 378 379 // Returns the number of removed characters. 380 int removeTitle()381 Formula::removeTitle() 382 { 383 int length = m_formula.length(); 384 385 // We want to remove the possibly-existing title for the formula, 386 // which is enclosed between `"' at the beginning of the line. 387 388 // One formula can be like this: 389 390 // "Decomposed adenine" C5H4N5 +H 391 392 // The "Decomposed adenine" is the title 393 // The C5H4N5 +H is the formula. 394 395 m_formula.remove(QRegExp("\".*\"")); 396 397 // Return the number of removed characters. 398 return (length - m_formula.length()); 399 } 400 401 402 // Returns the number of removed characters. 403 int removeSpaces()404 Formula::removeSpaces() 405 { 406 int length = m_formula.length(); 407 408 // We want to remove all the possibly-existing spaces. 409 410 m_formula.remove(QRegExp("\\s+")); 411 412 // Return the number of removed characters. 413 return (length - m_formula.length()); 414 } 415 416 417 //! Splits the formula according to its plus-/minus- actions. 418 /*! Analyzes the formula and separates all the minus components of 419 that formula from all the plus components. The different components 420 are set to their corresponding formula(minus formula and plus 421 formula). 422 423 At the end of the split work, each sub-formula(plus- and/or minus-) 424 is actually parsed for validity, using the reference atom list. 425 426 \param refList List of reference atoms. 427 428 \param times Number of times the formula has to be accounted 429 for. Defaults to 1. 430 431 \param store Indicates if AtomCount objects created during the 432 parsing of the sub-formulas generated by the split of the formula 433 have to be stored, or not. Defaults to false. 434 435 \param reset Indicates if the list of AtomCount objects has to 436 be reset before the splitParts work. This parameter may be useful in 437 case the caller needs to "accumulate" the accounting of the 438 formula. Defaults to false. 439 440 \return MXT_FORMULA_SPLIT_FAIL if the splitting failed, 441 MXT_FORMULA_SPLIT_PLUS if the components of the formula are all of 442 type plus, MXT_FORMULA_SPLIT_MINUS if all the components of the 443 formula are of type minus. The result value can be an OR'ing of 444 MXT_FORMULA_SPLIT_PLUS and MXT_FORMULA_SPLIT_MINUS. 445 */ 446 int splitParts(const QList<Atom * > & refList,int times,bool store,bool reset)447 Formula::splitParts(const QList<Atom *> &refList, 448 int times, 449 bool store, 450 bool reset) 451 { 452 QChar curChar; 453 QString tempFormula; 454 455 int result = 0; 456 457 bool wasParsingFormula = false; 458 bool shouldBeFormula = false; 459 bool wasMinusSign = false; 460 461 if(refList.isEmpty()) 462 qFatal("Fatal error at %s@%d. Aborting.",__FILE__, __LINE__); 463 464 // We are asked to put all the '+' components of the formula 465 // into corresponding formula and the same for the '-' components. 466 467 m_plusFormula.clear(); 468 m_minusFormula.clear(); 469 470 // Because the formula that we are analyzing might contain a title 471 // and spaces , we first remove these. But make a local copy of 472 // the member datum. 473 474 QString formula = m_formula; 475 476 // qDebug() << __FILE__ << __LINE__ 477 // << "splitParts before working:" 478 // << "m_formula:" << m_formula 479 // << "text" << Formula::text(); 480 481 // One formula can be like this: 482 483 // "Decomposed adenine" C5H4N5 +H 484 485 // The "Decomposed adenine" is the title 486 // The C5H4N5 +H is the formula. 487 488 formula.remove(QRegExp("\".*\"")); 489 490 // We want to remove all the possibly-existing spaces. 491 492 formula.remove(QRegExp("\\s+")); 493 494 // If the formula does not contain any '-' character, then we 495 // can approximate that all the formula is a '+' formula, that is a 496 // plusFormula: 497 498 if (actions() == '+') 499 { 500 m_plusFormula.append(formula); 501 502 // At this point we want to make sure that we have a correct 503 // formula. Remove all the occurrences of the '+' sign. 504 m_plusFormula.replace(QString("+"), QString("")); 505 506 if(m_plusFormula.length() > 0) 507 { 508 // qDebug() << __FILE__ << __LINE__ 509 // << "splitParts: with m_plusFormula:" 510 // << m_plusFormula; 511 512 if (!parse(refList, m_plusFormula, times, store, reset)) 513 return MXT_FORMULA_SPLIT_FAIL; 514 else 515 return MXT_FORMULA_SPLIT_PLUS; 516 } 517 } 518 519 // At this point, we truly have to iterate in the formula... 520 521 for (int iter = 0 ; iter < formula.length() ; ++iter) 522 { 523 curChar = formula.at(iter); 524 // qDebug() << "curChar:" << curChar; 525 526 if(curChar == '+' || curChar == '-') 527 { 528 if (shouldBeFormula) 529 return MXT_FORMULA_SPLIT_FAIL; 530 531 if (wasParsingFormula) 532 { 533 // We were parsing a formula, wich means that we are 534 // ending that formula now, by starting another one. For 535 // example, if we had "-CH3+COOH" we would typically be 536 // at the '+' after having parsed -CH3. So we now have 537 // to account for that latter formula. 538 539 if(wasMinusSign) 540 m_minusFormula.append(tempFormula); 541 else 542 m_plusFormula.append(tempFormula); 543 544 // Reinit the tempFormula for next round. 545 tempFormula.clear(); 546 547 // Now set proper bool values for next round. 548 shouldBeFormula = true; 549 wasMinusSign =(curChar == '-' ? true : false); 550 551 continue; 552 } 553 else 554 { 555 wasMinusSign =(curChar == '-' ? true : false); 556 shouldBeFormula = true; 557 558 continue; 559 } 560 } 561 else 562 { 563 // We are parsing either a digit or an alphabetical 564 // character : we just append it to the tempFormula: 565 tempFormula.append(curChar); 566 567 wasParsingFormula = true; 568 569 // We do not necessarily have to expect another formula 570 // component at next round, admitting we were on the 571 // nitrogen atom of CH3CN: 572 shouldBeFormula = false; 573 574 continue; 575 } 576 } // End for (int iter = 0 ; iter < formula.length() ; ++iter) 577 578 // At this point the loop was finished so we might have something 579 // interesting cooking: 580 581 if (wasParsingFormula && tempFormula.length() > 0) 582 { 583 if(wasMinusSign) 584 m_minusFormula.append(tempFormula); 585 else 586 m_plusFormula.append(tempFormula); 587 } 588 589 // At this point we want to make sure that we have a correct 590 // formula. First reset the atomcount stuff if required. 591 592 if (reset) 593 { 594 while(!m_atomCountList.isEmpty()) 595 delete m_atomCountList.takeFirst(); 596 } 597 598 599 // qDebug() << __FILE__ << __LINE__ 600 // << "splitParts:" 601 // << "right after splitting:" 602 // << "m_formula:" << m_formula 603 // << "text" << text(); 604 605 606 // Now that we have reset if required the atomCountList, we need not 607 // and we must not reset during the parsing below, otherwise if we 608 // have -H+H3PO4, then we'll compute +H3PO4 first, then we compute 609 // -H with reset to true : the +H3PO4 component is destroyed! 610 611 if (m_plusFormula.length() > 0) 612 { 613 bool res = parse(refList, m_plusFormula, times, 614 store, false /* reset */); 615 616 // qDebug() << __FILE__ << __LINE__ 617 // << "splitParts:" 618 // << "right after parse of m_plusFormula:" 619 // << m_plusFormula 620 // << "m_formula:" << m_formula 621 // << "text" << text(); 622 623 if(!res) 624 return MXT_FORMULA_SPLIT_FAIL; 625 else 626 result = MXT_FORMULA_SPLIT_PLUS; 627 } 628 629 630 if (m_minusFormula.length() > 0) 631 { 632 bool res = parse(refList, m_minusFormula, -times, 633 store, false /* reset */); 634 635 // qDebug() << __FILE__ << __LINE__ 636 // << "splitParts:" 637 // << "right after parse of m_minusFormula:" 638 // << m_minusFormula 639 // << "m_formula:" << m_formula 640 // << "text" << text(); 641 642 if(!res) 643 return MXT_FORMULA_SPLIT_FAIL; 644 else 645 result |= MXT_FORMULA_SPLIT_MINUS; 646 } 647 648 649 // qDebug() << __FILE__ << __LINE__ 650 // << formula.toAscii() << "-->" 651 // << "(+)" << m_plusFormula.toAscii() 652 // << "(-)" << m_minusFormula.toAscii(); 653 654 return result; 655 } 656 657 658 //! Parses the \p formula using the reference atom list. 659 /*! Upon parsing of the formula, a list of AtomCount objects are 660 created in order to be able to account for the mass of the formula. 661 662 \param refList List of reference atoms. 663 664 \param formula Formula to parse. 665 666 \param times Number of times that the formula should be accounted 667 for. Default value is 1. 668 669 \param store Indicates if AtomCount objects created during the 670 parsing of the formula have to be stored, or not. Default value is 671 false. 672 673 \param reset Indicates if AtomCount objects created during the 674 parsing of the formula have to be destroyed before doing another 675 parsing. This parameter is interesting if the caller needs to 676 "accumulate" the accounting of the formula. Default value is false. 677 678 \return true if parsing succeeded, false otherwise. 679 */ 680 bool parse(const QList<Atom * > & refList,const QString & formula,int times,bool store,bool reset)681 Formula::parse(const QList<Atom *> &refList, 682 const QString &formula, 683 int times, 684 bool store, 685 bool reset) 686 { 687 QChar curChar; 688 QString parsedCount; 689 QString parsedSymbol; 690 AtomCount atomCount; 691 692 bool wasDigit = false; 693 bool gotUpper = false; 694 695 Q_ASSERT(refList.size()); 696 697 // The formula member is a QString that should hold the formula 698 // according to this typical schema: "H2O"(water). That means we 699 // only want letters(Upper and lower case and number). 700 701 // The member atomCountList might be reset before starting, or if 702 // !reset, then the new atom counts are added to the ones 703 // preexisting. 704 705 // The formula should thus not be empty, otherwise there is nothing 706 // to do. But it is not an error that the formula be empty. 707 if (formula.length() == 0) 708 return true; 709 710 if (!checkSyntax(formula)) 711 return false; 712 713 // Also, the first character of the formula should be an Uppercase 714 // letter. If not, logically, the formula is incorrect. 715 if (formula.at(0).category() != QChar::Letter_Uppercase) 716 return false; 717 718 if (reset) 719 { 720 // We first want to iterate in the atomCountList and make sure 721 // we remove all items from it. 722 723 while(!m_atomCountList.isEmpty()) 724 delete m_atomCountList.takeFirst(); 725 } 726 727 // And now finally start the real parsing stuff. 728 729 for (int iter = 0 ; iter < formula.length() ; ++iter) 730 { 731 curChar = formula.at(iter); 732 733 if(curChar.category() == QChar::Number_DecimalDigit) 734 { 735 // We are parsing a digit. 736 737 parsedCount.append(curChar); 738 739 wasDigit = true; 740 741 continue; 742 } 743 else if (curChar.category() == QChar::Letter_Lowercase) 744 { 745 // Current character is lowercase, which means we are inside 746 // of an atom symbol, such as Ca(the 'a') or Nob(either 747 // 'o' or 'b'). Thus, gotUpper should be true ! 748 749 if (!gotUpper) 750 return false; 751 752 // Make use of the parsed numerical character. 753 parsedSymbol.append(curChar); 754 755 // Let the people know that we have parsed a lowercase char 756 // and not a digit. 757 wasDigit = false; 758 } 759 else if (curChar.category() == QChar::Letter_Uppercase) 760 { 761 // Current character is uppercase, which means that we are 762 // at the beginning of an atom symbol. Check if there was a 763 // symbol being parsed before this one. 764 765 if (parsedSymbol.isEmpty()) 766 { 767 // Start new parsing round. 768 parsedSymbol.append(curChar); 769 770 gotUpper = true; 771 wasDigit = false; 772 continue; 773 } 774 775 // There was a symbol being parsed. Fill-in the atomCount 776 // object. 777 atomCount.setSymbol(parsedSymbol); 778 779 // Now we can prepare the field for the next one. 780 parsedSymbol.clear(); 781 parsedSymbol.append(curChar); 782 783 // Before going on, check if the symbol is correct. 784 if (atomCount.isSymbolKnown(refList) == -1) 785 return false; 786 787 // If there was a count being parsed, we have to take it 788 // into account. 789 if (wasDigit) 790 { 791 // And now we have to convert the string representation 792 // of the atom count for that atom to int. In fact, we 793 // have to be able to know that water H2O has TWO 794 // hydrogen atoms in it. 795 bool isok = true; 796 atomCount.setCount(parsedCount.toInt(&isok, 10)); 797 798 if(atomCount.count() == 0 && !isok) 799 // The atom counts for nothing ! Or was there 800 // an error in the conversion ? 801 return false; 802 803 // But we remember that we have to take into account the 804 // times parameter. 805 806 atomCount.setCount(atomCount.count() * times); 807 808 // Clear parsedCount for next count parsing round. 809 parsedCount.clear(); 810 } 811 else 812 atomCount.setCount(1 * times); 813 814 // We can now make sure that the atom gets represented 815 // in the formula.atomCountList list of 816 // AtomCount*. But for this we use a function that 817 // will make sure there is not already the same atom 818 // symbol in that List, so as not to duplicate the items 819 // accounting for a single atom symbol. 820 821 if (store) 822 { 823 accountInList(atomCount); 824 825 // qDebug() << __FILE__ << __LINE__ 826 // << "accountInList:" 827 // << atomCount.symbol() << atomCount.count(); 828 } 829 830 // Let the people know what we got: 831 832 wasDigit = false; 833 gotUpper = true; 834 } 835 // end(curChar.category() == QChar::Letter_Uppercase) 836 } 837 // end for (int iter = 0 ; iter < formula.length() ; ++iter) 838 839 // At this point we are at then end of the string, and we thus might 840 // still have something cooking: 841 842 // Thus we have to check that the last parsed atom 843 // symbol is correct. 844 845 atomCount.setSymbol(parsedSymbol); 846 847 if (atomCount.isSymbolKnown(refList) == -1) 848 return false; 849 850 // And now we have to convert the string representation 851 // of the atom count for that atom to int. In fact, we 852 // have to be able to know that water H2O has TWO 853 // hydrogen atoms in it. 854 855 // If there was a count being parsed, we have to take it 856 // into account. 857 if (wasDigit) 858 { 859 // And now we have to convert the string representation 860 // of the atom count for that atom to int. In fact, we 861 // have to be able to know that water H2O has TWO 862 // hydrogen atoms in it. 863 bool isok = true; 864 atomCount.setCount(parsedCount.toInt(&isok, 10)); 865 866 if(atomCount.count() == 0 && !isok) 867 // The atom counts for nothing ! Or was there 868 // an error in the conversion ? 869 return false; 870 871 // But we remember that we have to take into account the 872 // times parameter. 873 874 atomCount.setCount(atomCount.count() * times); 875 } 876 else 877 atomCount.setCount(1 * times); 878 879 // Finally, if asked by the caller, we can account for 880 // this atom symbol/count also ! 881 882 if (store) 883 { 884 accountInList(atomCount); 885 886 // qDebug() << __FILE__ << __LINE__ 887 // << "accountInList:" 888 // << atomCount.symbol() << atomCount.count(); 889 } 890 891 return true; 892 } 893 894 895 int accountInList(const AtomCount & atomCount,int times)896 Formula::accountInList(const AtomCount &atomCount, int times) 897 { 898 int count = atomCount.count() * times; 899 int newCount = 0; 900 bool found = false; 901 902 for(int iter = 0; iter < m_atomCountList.size(); ++iter) 903 { 904 if(m_atomCountList.at(iter)->symbol() == atomCount.symbol()) 905 { 906 // qDebug() << __FILE__ << __LINE__ 907 // << "accountInList:" 908 // << "same symbol:" << atomCount.symbol(); 909 910 found = true; 911 912 // Same atom found. If the result of accounting is that 913 // the count is 0, then remove the atomCount instance 914 // alltogether. For example, admitting that there was 1 915 // Carbon atom in the list, and we account -1 Carbon, the 916 // result is that there is no Carbon atom remaining in the 917 // list. The call below would return 0. In that case 918 // remove an atomCount item of which the count is 0. 919 920 newCount = m_atomCountList.at(iter)->account(count); 921 922 if(!newCount) 923 { 924 delete m_atomCountList.takeAt(iter); 925 926 // qDebug() << __FILE__ << __LINE__ 927 // << "accountInList:" 928 // "deleted atomCount instance " 929 // "because count reached 0"; 930 } 931 else 932 { 933 // qDebug() << __FILE__ << __LINE__ 934 // << "accountInList:" 935 // << "new count:" 936 // << m_atomCountList.at(iter)->count(); 937 } 938 939 // We have found an atomCount instance by the same symbol, 940 // there should not be twice the same symbol in a formula, 941 // thus we can break the loop. 942 break; 943 } 944 945 continue; 946 } 947 // End of 948 // for(int iter = 0; iter < m_atomCountList.size(); ++iter) 949 950 if(!found) 951 { 952 // AtomCount not found locally. Thus make a copy and append to 953 // *this m_atomCountList.. 954 955 AtomCount *newAtomCount = new AtomCount(atomCount); 956 newAtomCount->setCount(count); 957 958 m_atomCountList.append(newAtomCount); 959 960 newCount = count; 961 } 962 else 963 { 964 // One AtomCount was found locally, newCount contains the new 965 // count for that found AtomCount. 966 } 967 968 // Update what's the text of the formula to represent what is in 969 // atomCount list. 970 m_formula = elementalComposition(); 971 972 return newCount; 973 } 974 975 976 int accountInList(const QString & text,const QList<Atom * > & atomRefList,int times)977 Formula::accountInList(const QString &text, 978 const QList<Atom *> &atomRefList, 979 int times) 980 { 981 // We get a formula as an elemental composition and we want to 982 // account for that formula in *this formula. 983 984 // First off, validate the text. 985 986 Formula formula(text); 987 988 if(!formula.validate(atomRefList, true, true)) 989 return -1; 990 991 // Now, for each AtomCount search one in the current formula. 992 993 const QList<AtomCount *> &otherAtomCountList = formula.atomCountList(); 994 995 for(int iter = 0; iter < otherAtomCountList.size(); ++iter) 996 accountInList(*otherAtomCountList.at(iter), times); 997 998 // Update what's the text of the formula to represent what is in 999 // atomCount list. 1000 m_formula = elementalComposition(); 1001 1002 return 1; 1003 } 1004 1005 1006 1007 //! Checks the syntax of the \p formula. 1008 /*! The syntax of the \p formula is checked by verifying that the 1009 letters and ciphers in the formula are correctly placed. That is, we 1010 want that the ciphers appear after an atom symbol and not before 1011 it. We want that the atom symbol be made of one uppercase letter and 1012 that the following letters be lowercase. 1013 1014 \attention This is a syntax check and not a true validation, as the 1015 formula can contain symbols that are syntactically valid but 1016 corresponding to atom definitions not available on the system. 1017 1018 \param formula the formula. 1019 1020 \return true upon successful check, false otherwise. 1021 1022 \sa validate(). 1023 */ 1024 bool checkSyntax(const QString & formula)1025 Formula::checkSyntax(const QString &formula) 1026 { 1027 QChar curChar; 1028 1029 bool gotUpper = false; 1030 bool wasSign = false; 1031 1032 // Because the formula that we are analyzing might contain a title 1033 // and spaces , we first remove these. But make a local copy of 1034 // the member datum. 1035 1036 QString localFormula = formula; 1037 1038 // One formula can be like this: 1039 1040 // "Decomposed adenine" C5H4N5 +H 1041 1042 // The "Decomposed adenine" is the title 1043 // The C5H4N5 +H is the formula. 1044 1045 localFormula.remove(QRegExp("\".*\"")); 1046 1047 // We want to remove all the possibly-existing spaces. 1048 1049 localFormula.remove(QRegExp("\\s+")); 1050 1051 1052 for (int iter = 0 ; iter < localFormula.length() ; ++iter) 1053 { 1054 curChar = localFormula.at(iter); 1055 1056 if(curChar.category() == QChar::Number_DecimalDigit) 1057 { 1058 // We are parsing a digit. 1059 1060 // We may not have a digit after a +/- sign. 1061 if (wasSign) 1062 return false; 1063 1064 wasSign = false; 1065 1066 continue; 1067 } 1068 else if (curChar.category() == QChar::Letter_Lowercase) 1069 { 1070 // Current character is lowercase, which means we are inside 1071 // of an atom symbol, such as Ca(the 'a') or Nob(either 1072 // 'o' or 'b'). Thus, gotUpper should be true ! 1073 1074 if (!gotUpper) 1075 return false; 1076 1077 1078 // We may not have a lowercase character after a +/- sign. 1079 if (wasSign) 1080 return false; 1081 1082 // Let the people know that we have parsed a lowercase char 1083 // and not a digit. 1084 wasSign = false; 1085 } 1086 else if (curChar.category() == QChar::Letter_Uppercase) 1087 { 1088 // Current character is uppercase, which means that we are 1089 // at the beginning of an atom symbol. 1090 1091 // Let the people know what we got: 1092 1093 wasSign = false; 1094 gotUpper = true; 1095 } 1096 else 1097 { 1098 if (curChar != '+' && curChar != '-') 1099 return false; 1100 else 1101 { 1102 // We may not have 2 +/- signs in a raw. 1103 if(wasSign) 1104 return false; 1105 } 1106 1107 wasSign = true; 1108 gotUpper = false; 1109 } 1110 } 1111 // end for (int iter = 0 ; iter < localFormula.length() ; ++iter) 1112 1113 // At this point we found no error condition. 1114 return true; 1115 } 1116 1117 1118 //! Checks the syntax of the formula. 1119 /*! The syntax of the formula is checked by verifying that the letters 1120 and ciphers in the formula are correctly placed. That is, we want 1121 that the ciphers appear after an atom symbol and not before it. We 1122 want that the atom symbol be made of one uppercase letter and that 1123 the following letters be lowercase. 1124 1125 Note that the checking only concerns the formula, and not the 1126 minus-/plus- formulas. 1127 1128 \attention This is a syntax check and not a true validation, as the 1129 formula can contain symbols that are syntactically valid but 1130 corresponding to atom definitions not available on the system. 1131 1132 \return true upon successful check, false otherwise. 1133 1134 \sa validate(). 1135 */ 1136 bool checkSyntax()1137 Formula::checkSyntax() 1138 { 1139 // The default formula is always m_formula. 1140 1141 return checkSyntax(m_formula); 1142 } 1143 1144 1145 //! Validates the formula. 1146 /*! The validation of the formula involves: 1147 1148 \li Checking that the formula is not empty; 1149 1150 \li Splitting that formula into its plus-/minus- parts and parse 1151 the obtained plus-/minus- formulas. During parsing of the 1152 minus-/plus- formulas, each atom symbol encountered in the 1153 formulas is validated against the reference atom list; 1154 1155 \li Checking that at least the plus- or the minus- part contains 1156 something(same idea that the formula cannot be empty). 1157 1158 1159 \param refList List of reference atoms. 1160 1161 \param store Indicates if AtomCount objects created during the 1162 parsing of the sub-formulas generated by the split of the formula 1163 have to be stored, or not. Defaults to false. 1164 1165 \param reset Indicates if the list of AtomCount objects has to be 1166 reset before the splitParts work. This parameter may be useful in 1167 case the caller needs to "accumulate" the accounting of the 1168 formula. Defaults to false. 1169 1170 1171 \return true if the validation succeeded, false otherwise. 1172 */ 1173 bool validate(const QList<Atom * > & refList,bool store,bool reset)1174 Formula::validate(const QList<Atom *> &refList, 1175 bool store, bool reset) 1176 { 1177 if (!m_formula.size()) 1178 return false; 1179 1180 int result = splitParts(refList, 1, store, reset); 1181 1182 if (result == MXT_FORMULA_SPLIT_FAIL) 1183 return false; 1184 1185 // The sum of m_plusFormula and m_minusFormula cannot be empty. 1186 if (m_plusFormula.size() && ! m_plusFormula.size()) 1187 return false; 1188 1189 return true; 1190 } 1191 1192 1193 //! Accounts \p this formula's mono/avg masses. 1194 /*! The masses are calculated first and then the \p mono and \p avg 1195 parameters are updated using the calculated values. The accounting 1196 can be compounded \p times times. 1197 1198 \param refList List of atoms to be used as reference. 1199 1200 \param mono Pointer to the monoisotopic mass to be updated. Defaults 1201 to 0, in which case the value is not updated. 1202 1203 \param avg Pointer to the average mass to be updated. Defaults to 0, 1204 in which case the value is not updated. 1205 1206 \param times Times that the masses should be compounded prior to be 1207 accounted for. 1208 1209 \return true upon success, false otherwise. 1210 */ 1211 bool accountMasses(const QList<Atom * > & refList,double * mono,double * avg,int times)1212 Formula::accountMasses(const QList<Atom *> &refList, 1213 double *mono, double *avg, int times) 1214 { 1215 // Note the 'times' param below. 1216 if (splitParts(refList, times, 1217 true /* store */, 1218 true /* reset */) == MXT_FORMULA_SPLIT_FAIL) 1219 return false; 1220 1221 // qDebug() << __FILE__ << __LINE__ 1222 // << "accountMasses:" 1223 // << "after splitParts:" 1224 // << "store: true ; reset: true" 1225 // << "m_formula:" << m_formula 1226 // << "text" << Formula::text(); 1227 1228 for (int iter = 0; iter < m_atomCountList.size(); ++iter) 1229 { 1230 AtomCount *atomCount = 0; 1231 1232 atomCount = m_atomCountList.at(iter); 1233 1234 // note the '1' times below because we already accounted 1235 // for the 'times' parameter in the splitParts() call. 1236 if(!atomCount->accountMasses(refList, mono, avg, 1)) 1237 return false; 1238 } 1239 1240 return true; 1241 } 1242 1243 1244 //! Accounts \p this formula's mono/avg masses. 1245 /*! The masses are calculated first and then the \p ponderable is 1246 updated using the calculated values. The accounting can be 1247 compounded \p times times. 1248 1249 \param refList List of atoms to be used as reference. 1250 1251 \param ponderable Pointer to the ponderable to be updated. Cannot be 0. 1252 1253 \param times Times that the masses should be compounded prior to be 1254 accounted for. 1255 1256 \return true upon success, false otherwise. 1257 */ 1258 bool accountMasses(const QList<Atom * > & refList,Ponderable * ponderable,int times)1259 Formula::accountMasses(const QList<Atom *> &refList, 1260 Ponderable *ponderable, int times) 1261 { 1262 Q_ASSERT(ponderable); 1263 1264 // Note the 'times' param below. 1265 if (splitParts(refList, times, true, true) == MXT_FORMULA_SPLIT_FAIL) 1266 return false; 1267 1268 for (int iter = 0; iter < m_atomCountList.size(); ++iter) 1269 { 1270 AtomCount *atomCount = m_atomCountList.at(iter); 1271 1272 // note the '1' times below because we already accounted 1273 // for the 'times' parameter in the splitParts() call. 1274 if(!atomCount->accountMasses(refList, 1275 &ponderable->rmono(), 1276 &ponderable->ravg(), 1)) 1277 return false; 1278 } 1279 1280 return true; 1281 } 1282 1283 1284 //! Accounts the atoms in the formula \p times times. 1285 /*! Calls splitParts(\p refList, \p times, true, true). 1286 1287 \param refList List of atoms to be used as reference. 1288 1289 \param times Times that the atom counts should be compounded prior to 1290 be accounted for. 1291 1292 \return true upon success, false otherwise. 1293 1294 \sa splitParts(). 1295 */ 1296 bool accountAtoms(const QList<Atom * > & refList,int times)1297 Formula::accountAtoms(const QList<Atom *> &refList, int times) 1298 { 1299 // Note the 'times' param below. 1300 if (splitParts(refList, times, true, false) == MXT_FORMULA_SPLIT_FAIL) 1301 return false; 1302 1303 return true; 1304 } 1305 1306 1307 //! Computes a formula string. 1308 /*! Computes a formula string by iterating in the list of atom count 1309 objects. 1310 1311 \return A string containing the formula. 1312 */ 1313 QString elementalComposition() const1314 Formula::elementalComposition() const 1315 { 1316 // We have a list of AtomCount objects which might have either a 1317 // positive or a negative count. 1318 1319 // We want to provide a formula that lists the positive component 1320 // first and the negative component last. 1321 1322 // Each positive/negative component will list the atoms in the 1323 // conventional order : CxxHxxNxxOxx and all the rest in 1324 // alphabetical order. 1325 1326 // For each atomCount in the m_atomCountList list, prepare a 1327 // string like "C12" and "H13", for example, in two distinct 1328 // QStringList's, one for positive count values and one for 1329 // negative count values. 1330 1331 QStringList negativeStringList; 1332 QStringList positiveStringList; 1333 1334 for (int iter = 0; iter < m_atomCountList.size(); ++iter) 1335 { 1336 AtomCount *atomCount = m_atomCountList.at(iter); 1337 1338 if(atomCount->count() < 0) 1339 { 1340 negativeStringList.append(QString("%1%2") 1341 .arg(atomCount->symbol()) 1342 .arg(-1 * atomCount->count())); 1343 1344 // qDebug() << __FILE__ << __LINE__ 1345 // << "negative atomCount:" 1346 // << atomCount->symbol() 1347 // << "/" 1348 // << (-1 * atomCount->count()); 1349 } 1350 else 1351 { 1352 positiveStringList.append(QString("%1%2") 1353 .arg(atomCount->symbol()) 1354 .arg(atomCount->count())); 1355 1356 // qDebug() << __FILE__ << __LINE__ 1357 // << "positive atomCount:" 1358 // << atomCount->symbol() 1359 // << "/" 1360 // << atomCount->count(); 1361 } 1362 } 1363 1364 // Sort the lists. 1365 1366 negativeStringList.sort(); 1367 positiveStringList.sort(); 1368 1369 // We want to provide for each positive and negative components of 1370 // the initial formula object, an elemental formula that complies 1371 // with the convention : first the C atom, next the H, N, O atoms 1372 // and all the subsequent ones in alphabetical order. 1373 1374 // Thus we look for the four C, H, N, O atoms, and we create the 1375 // initial part of the elemental formula. Each time we find one 1376 // such atom we remove it from the list, so that we can later just 1377 // append all the remaining atoms, since we have sorted the lists 1378 // above. 1379 1380 // The positive component 1381 // ====================== 1382 1383 int atomIndex = 0; 1384 QString positiveComponentString; 1385 1386 // Carbon 1387 atomIndex = positiveStringList.indexOf(QRegExp("C\\d+")); 1388 if(atomIndex != -1) 1389 { 1390 positiveComponentString += positiveStringList.at(atomIndex); 1391 positiveStringList.removeAt(atomIndex); 1392 } 1393 1394 // Hydrogen 1395 atomIndex = positiveStringList.indexOf(QRegExp("H\\d+")); 1396 if(atomIndex != -1) 1397 { 1398 positiveComponentString += positiveStringList.at(atomIndex); 1399 positiveStringList.removeAt(atomIndex); 1400 } 1401 1402 // Nitrogen 1403 atomIndex = positiveStringList.indexOf(QRegExp("N\\d+")); 1404 if(atomIndex != -1) 1405 { 1406 positiveComponentString += positiveStringList.at(atomIndex); 1407 positiveStringList.removeAt(atomIndex); 1408 } 1409 1410 // Oxygen 1411 atomIndex = positiveStringList.indexOf(QRegExp("O\\d+")); 1412 if(atomIndex != -1) 1413 { 1414 positiveComponentString += positiveStringList.at(atomIndex); 1415 positiveStringList.removeAt(atomIndex); 1416 } 1417 1418 // All the other items are already sorted and can be appended. 1419 1420 positiveComponentString += positiveStringList.join(""); 1421 1422 // qDebug() << __FILE__ << __LINE__ 1423 // <<"positiveComponentString:" << positiveComponentString; 1424 1425 1426 // The negative component 1427 // ====================== 1428 1429 atomIndex = 0; 1430 QString negativeComponentString; 1431 1432 // Carbon 1433 atomIndex = negativeStringList.indexOf(QRegExp("C\\d+")); 1434 if(atomIndex != -1) 1435 { 1436 negativeComponentString += negativeStringList.at(atomIndex); 1437 negativeStringList.removeAt(atomIndex); 1438 } 1439 1440 // Hydrogen 1441 atomIndex = negativeStringList.indexOf(QRegExp("H\\d+")); 1442 if(atomIndex != -1) 1443 { 1444 negativeComponentString += negativeStringList.at(atomIndex); 1445 negativeStringList.removeAt(atomIndex); 1446 } 1447 1448 // Nitrogen 1449 atomIndex = negativeStringList.indexOf(QRegExp("N\\d+")); 1450 if(atomIndex != -1) 1451 { 1452 negativeComponentString += negativeStringList.at(atomIndex); 1453 negativeStringList.removeAt(atomIndex); 1454 } 1455 1456 // Oxygen 1457 atomIndex = negativeStringList.indexOf(QRegExp("O\\d+")); 1458 if(atomIndex != -1) 1459 { 1460 negativeComponentString += negativeStringList.at(atomIndex); 1461 negativeStringList.removeAt(atomIndex); 1462 } 1463 1464 // All the other items are already sorted and can be appended. 1465 1466 negativeComponentString += negativeStringList.join(""); 1467 1468 // qDebug() << __FILE__ << __LINE__ 1469 // <<"negativeComponentString:" << negativeComponentString; 1470 1471 // Create the final elemental formula that comprises both the 1472 // positive and negative element. First the positive element and 1473 // then the negative one. Only append the negative one, prepended 1474 // with '-' if the string is non-empty. 1475 1476 QString elementalComposition = positiveComponentString; 1477 1478 if (!negativeComponentString.isEmpty()) 1479 elementalComposition += QString("-%1") 1480 .arg(negativeComponentString); 1481 1482 // qDebug() << __FILE__ << __LINE__ 1483 // <<"elementalComposition:" << elementalComposition; 1484 1485 return elementalComposition; 1486 } 1487 1488 1489 //! Computes the total number of atoms. 1490 /*! 1491 1492 \return The number of atoms. 1493 */ 1494 int totalAtoms() const1495 Formula::totalAtoms() const 1496 { 1497 int totalAtomCount = 0; 1498 1499 for (int iter = 0; iter < m_atomCountList.size(); ++iter) 1500 { 1501 AtomCount *atomCount = m_atomCountList.at(iter); 1502 1503 totalAtomCount += atomCount->count(); 1504 } 1505 1506 return totalAtomCount; 1507 } 1508 1509 1510 //! Computes the total number of isotopes. 1511 /*! 1512 1513 \param refList List of atoms to be used as reference. 1514 1515 \return The number of isotopes. 1516 */ 1517 int totalIsotopes(const QList<Atom * > & refList) const1518 Formula::totalIsotopes(const QList<Atom *> &refList) const 1519 { 1520 int totalIsotopeCount = 0; 1521 1522 for (int iter = 0; iter < m_atomCountList.size(); ++iter) 1523 { 1524 AtomCount *atomCount = m_atomCountList.at(iter); 1525 1526 Atom listAtom; 1527 1528 if(Atom::isSymbolInList(atomCount->symbol(), 1529 refList, &listAtom) == -1) 1530 return -1; 1531 1532 // The number of isotopes for current atomCount is the number of 1533 // atoms compounded per the number of isotopes in the isotope 1534 // list. 1535 totalIsotopeCount += 1536 (listAtom.isotopeList().size() * atomCount->count()); 1537 } 1538 1539 return totalIsotopeCount; 1540 } 1541 1542 1543 //! Computes the number of entities(atoms and isotopes). 1544 /*! 1545 1546 \param refList List of atoms to be used as reference. 1547 1548 \param totalAtoms Pointer to a integer in which to store the number of 1549 atoms. Defaults to 0, in which case the value is not updated. 1550 1551 \param totalIsotopes Pointer to a integer in which to store the number 1552 of isotopes. Defaults to 0, in which case the value is not updated. 1553 1554 \return true upon a successfull computation, false otherwise. 1555 */ 1556 bool totalEntities(const QList<Atom * > & refList,int * totalAtoms,int * totalIsotopes) const1557 Formula::totalEntities(const QList<Atom *> &refList, 1558 int *totalAtoms, int *totalIsotopes) const 1559 { 1560 for (int iter = 0; iter < m_atomCountList.size(); ++iter) 1561 { 1562 AtomCount *atomCount = m_atomCountList.at(iter); 1563 1564 if(totalAtoms) 1565 *totalAtoms += atomCount->count(); 1566 1567 if(totalIsotopes) 1568 { 1569 Atom listAtom; 1570 1571 if (Atom::isSymbolInList(atomCount->symbol(), 1572 refList, &listAtom) == -1) 1573 return false; 1574 1575 // The number of isotopes for current atomCount is the number of 1576 // atoms compounded per the number of isotopes in the isotope 1577 // list. 1578 *totalIsotopes += 1579 (listAtom.isotopeList().size() * atomCount->count()); 1580 } 1581 } 1582 1583 return true; 1584 } 1585 1586 1587 //! Performs a deep copy of the atom count objects. 1588 /*! Each atom count object in the list of such objects is updated 1589 deeply with the values obtained from the corresponding atom in the 1590 \p refList list of reference atoms. This ensures that each atom 1591 count object in the formula has a deep knowledge of its isotopic 1592 composition. Such kind of process is used when isotopic pattern 1593 calculations are to be performed for a given formula. 1594 1595 \param refList List of reference atoms. 1596 1597 \return true upon success, false otherwise. 1598 */ 1599 bool deepAtomCopy(const QList<Atom * > & refList)1600 Formula::deepAtomCopy(const QList<Atom *> &refList) 1601 { 1602 // When the formula is parsed, the atomCount objects(derived form 1603 // Atom) are created by only shallow-copying(only the atom 1604 // symbol is actually copied to identify the atom). 1605 1606 // Here, we are asked that the Atom component of the AtomCount 1607 // objects in the list of such instances be deep-copied from the 1608 // corresponding Atom found in the reference atom list 1609 // 'refList'. This way, the updated objects have their actual list 1610 // of isotopes(this is useful for the isotopic pattern calculation, 1611 // for example). 1612 1613 for (int iter = 0; iter < m_atomCountList.size(); ++iter) 1614 { 1615 AtomCount *atomCount = m_atomCountList.at(iter); 1616 1617 if(Atom::isSymbolInList(atomCount->symbol(), 1618 refList, atomCount) == -1) 1619 return false; 1620 } 1621 1622 return true; 1623 } 1624 1625 1626 1627 //! Parses a formula XML element and sets the data to the formula. 1628 /*! Parses the formula XML element passed as argument and sets the 1629 data of that element to \p this formula instance(this is called XML 1630 rendering). The syntax of the parsed formula is checked and the 1631 result of that check is returned. 1632 1633 \param element XML element to be parsed and rendered. 1634 1635 \return true if parsing and syntax checking were successful, false 1636 otherwise. 1637 */ 1638 bool renderXmlFormulaElement(const QDomElement & element)1639 Formula::renderXmlFormulaElement(const QDomElement &element) 1640 { 1641 if (element.tagName() != "formula") 1642 return false; 1643 1644 m_formula = element.text(); 1645 1646 // Do not forget that we might have a title associated with the 1647 // formula and spaces. checkSyntax() should care of removing these 1648 // title and spaces before checking for chemical syntax 1649 // correctness. 1650 1651 return checkSyntax(); 1652 } 1653 1654 } // namespace massXpert 1655