1 /* massXpert - the true massist's program.
2    --------------------------------------
3    Copyright(C) 2006,2007 Filippo Rusconi
4 
5    http://www.massxpert.org/massXpert
6 
7    This file is part of the massXpert project.
8 
9    The massxpert project is the successor to the "GNU polyxmass"
10    project that is an official GNU project package(see
11    www.gnu.org). The massXpert project is not endorsed by the GNU
12    project, although it is released ---in its entirety--- under the
13    GNU General Public License. A huge part of the code in massXpert
14    is actually a C++ rewrite of code in GNU polyxmass. As such
15    massXpert was started at the Centre National de la Recherche
16    Scientifique(FRANCE), that granted me the formal authorization to
17    publish it under this Free Software License.
18 
19    This software is free software; you can redistribute it and/or
20    modify it under the terms of the GNU  General Public
21    License version 3, as published by the Free Software Foundation.
22 
23 
24    This software is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    General Public License for more details.
28 
29    You should have received a copy of the GNU General Public License
30    along with this software; if not, write to the
31 
32    Free Software Foundation, Inc.,
33 
34    51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
35 */
36 
37 
38 /////////////////////// Local includes
39 #include "cleaver.hpp"
40 
41 
42 namespace massXpert
43 {
44 
Cleaver(Polymer * polymer,const PolChemDef * polChemDef,const CleaveOptions & cleaveOptions,const CalcOptions & calcOptions,const IonizeRule & ionizeRule)45   Cleaver::Cleaver(Polymer *polymer,
46                    const PolChemDef *polChemDef,
47                    const CleaveOptions &cleaveOptions,
48                    const CalcOptions &calcOptions,
49                    const IonizeRule &ionizeRule)
50     : mp_polymer(polymer),
51       mp_polChemDef(polChemDef),
52       m_cleaveOptions(cleaveOptions),
53       m_calcOptions(calcOptions),
54       m_ionizeRule(ionizeRule)
55   {
56     Q_ASSERT(mp_polymer && mp_polChemDef);
57     mp_oligomerList = 0;
58   }
59 
60 
Cleaver(const Cleaver & other)61   Cleaver::Cleaver(const Cleaver &other)
62     : mp_polymer(other.mp_polymer),
63       mp_polChemDef(other.mp_polChemDef),
64       m_cleaveOptions(other.m_cleaveOptions),
65       m_calcOptions(other.m_calcOptions),
66       m_ionizeRule(other.m_ionizeRule)
67   {
68     Q_ASSERT(mp_polymer && mp_polChemDef);
69     mp_oligomerList = 0;
70   }
71 
72 
~Cleaver()73   Cleaver::~Cleaver()
74   {
75     // We are not owner of the oligomer list, do not free it!
76   }
77 
78 
79   void
setOligomerList(OligomerList * oligomerList)80   Cleaver::setOligomerList(OligomerList *oligomerList)
81   {
82     Q_ASSERT(oligomerList);
83 
84     mp_oligomerList = oligomerList;
85   }
86 
87 
88   OligomerList *
oligomerList()89   Cleaver::oligomerList()
90   {
91     return mp_oligomerList;
92   }
93 
94 
95   bool
cleave(bool reset)96   Cleaver::cleave(bool reset)
97   {
98     if (!mp_oligomerList)
99       qFatal("%s@%d -- The oligomer list is 0.", __FILE__, __LINE__);
100 
101     // If the polymer sequence is empty, just return.
102     if (!mp_polymer->size())
103       return true;
104 
105     // Ensure that the cleavage pattern was already parsed.
106 
107     if (!m_cleaveOptions.motifList()->size())
108       {
109         if (!m_cleaveOptions.parse())
110           {
111             qDebug() << __FILE__ << __LINE__
112                      << "Failed to parse the cleavage options";
113 
114             return false;
115           }
116       }
117 
118     //   qDebug() << __FILE__ << __LINE__
119     // 	    << "number of motifs:"
120     // 	    << mp_cleaveOptions->motifList()->size();
121 
122     if (!fillIndexLists())
123       {
124         qDebug() << __FILE__ << __LINE__
125                  << "Index lists(cleave/nocleave) are empty."
126           "No oligomer generated.";
127 
128         // We can return true, as no error condition was found but not
129         // oligomers were generated.
130 
131         return true;
132       }
133 
134     if (resolveCleavageNoCleavage() == -1)
135       {
136         qDebug() << __FILE__ << __LINE__
137                  << "Failed to resolve cleavage/nocleavage";
138 
139         return false;
140       }
141 
142     removeDuplicatesCleavage();
143 
144     qSort(m_cleaveIndexList.begin(), m_cleaveIndexList.end());
145 
146     //     for (int debugIter = 0; debugIter < m_cleaveIndexList.size();
147     // 	 ++debugIter)
148     //       {
149     // 	qDebug() << __FILE__ << __LINE__
150     // 		 << "Index:" << m_cleaveIndexList.at(debugIter);
151     //       }
152 
153     if (reset)
154       emptyOligomerList();
155 
156     for (int iter = 0 ; iter <= m_cleaveOptions.partials(); ++iter)
157       {
158         if (cleavePartial(iter) == -1)
159           {
160             qDebug() << __FILE__ << __LINE__
161                      << "Failed to perform partial cleavage at index:"
162                      << iter;
163 
164             return false;
165           }
166       }
167 
168     // At this point we have the list of lists of oligomers, one list of
169     // oligomers for each partial cleavage.
170 
171     while (m_cleaveIndexList.size())
172       m_cleaveIndexList.removeFirst();
173 
174     while (m_noCleaveIndexList.size())
175       m_noCleaveIndexList.removeFirst();
176 
177     return true;
178   }
179 
180 
181   int
fillIndexLists()182   Cleaver::fillIndexLists()
183   {
184     QList<CleaveMotif *> *motifList = m_cleaveOptions.motifList();
185 
186     while (m_cleaveIndexList.size())
187       m_cleaveIndexList.removeFirst();
188 
189     while (m_noCleaveIndexList.size())
190       m_noCleaveIndexList.removeFirst();
191 
192     // Since version 2.3.0, the cleavage might be performed on a
193     // selected portion of a sequence only, not necessarily on the
194     // whole polymer sequence.
195 
196     CoordinateList coordinateList = m_calcOptions.coordinateList();
197     Coordinates *coordinates = coordinateList.first();
198 
199     int startIndex = coordinates->start();
200     int endIndex = coordinates->end();
201 
202     for (int iter = 0; iter < motifList->size(); ++iter)
203       {
204         CleaveMotif *cleaveMotif = motifList->at(iter);
205         Q_ASSERT(cleaveMotif);
206 
207         int index = startIndex -1;
208 
209         while (1)
210           {
211             index = findCleaveMotif(*cleaveMotif, index + 1, endIndex);
212 
213             if (index == -1)
214               break;
215 
216             // Do not forget: The position at which the motif is found
217             // in the polymer sequence is not necessarily the position
218             // at which the cleavage will effectively occur. Indeed,
219             // let's say that we found such motif in the polymer
220             // sequence: "KKRKGP". This motif was extracted from a
221             // cleave spec that had a pattern like this: "KKRK/GP". What
222             // we see here is that the cleavage occurs after the fourth
223             // monomer! And we must realize that the 'index' returned
224             // above corresponds to the index of the first 'K' in
225             // "KKRKGP" motif that was found in the polymer
226             // sequence. Thus we have to take into account the offset
227             //(+4, in our example, WHICH IS A POSITION and not an
228             // index, which is why we need to remove 1 below) of the
229             // cleavage:
230 
231             int cleavageIndex = index + cleaveMotif->offset() - 1;
232 
233             if (cleavageIndex < 0)
234               continue;
235 
236             if (cleavageIndex >= endIndex)
237               break;
238 
239             // qDebug() << __FILE__ << __LINE__
240             //          << "Found new cleavage index:"
241             //          << cleavageIndex;
242 
243             if (cleaveMotif->isForCleave())
244               {
245                 m_cleaveIndexList.append(cleavageIndex);
246 
247                 // qDebug() << __FILE__ << __LINE__
248                 //          << "For cleavage, index:" << cleavageIndex;
249               }
250             else
251               {
252                 m_noCleaveIndexList.append(cleavageIndex);
253 
254                 // qDebug() << __FILE__ << __LINE__
255                 //          << "Not for cleavage";
256               }
257 
258           }
259         // End of
260         // while (1)
261       }
262     // End of
263     // for (int iter = 0; iter < motifList->size(); ++iter)
264 
265     // Note that returning 0 is not an error condition, because a
266     // sequence where no site is found whatsoever will result in 0.
267     return m_cleaveIndexList.size() +  m_noCleaveIndexList.size();
268   }
269 
270 
271   int
resolveCleavageNoCleavage()272   Cleaver::resolveCleavageNoCleavage()
273   {
274     for (int iter = 0; iter < m_noCleaveIndexList.size(); ++iter)
275       {
276         int noCleaveIndex = m_noCleaveIndexList.at(iter);
277 
278         for(int jter = 0; jter < m_cleaveIndexList.size(); ++jter)
279           {
280             int cleaveIndex = m_cleaveIndexList.at(jter);
281 
282             if (noCleaveIndex == cleaveIndex)
283               m_cleaveIndexList.removeAt(jter);
284           }
285       }
286 
287     return m_cleaveIndexList.size();
288   }
289 
290 
291   int
removeDuplicatesCleavage()292   Cleaver::removeDuplicatesCleavage()
293   {
294     for (int iter = 0; iter < m_cleaveIndexList.size(); ++iter)
295       {
296         int index = m_cleaveIndexList.at(iter);
297 
298         int foundItemIndex = m_cleaveIndexList.indexOf(index, iter + 1);
299 
300         if (foundItemIndex != -1)
301           {
302             m_cleaveIndexList.removeAt(foundItemIndex);
303             --iter;
304           }
305       }
306 
307     return m_cleaveIndexList.size();
308   }
309 
310 
311   int
findCleaveMotif(CleaveMotif & cleaveMotif,int startIndex,int endIndex)312   Cleaver::findCleaveMotif(CleaveMotif &cleaveMotif,
313                            int startIndex, int endIndex)
314   {
315     bool noGood = false;
316 
317     int firstIndex = 0;
318 
319     QList<const Monomer *> monomerList = mp_polymer->monomerList();
320     const QStringList &codeList = cleaveMotif.codeList();
321 
322 
323     //We have to iterate in the polymer sequence starting at 'index', in
324     //search for a sequence element identical to the sequence in the
325     //'cleaveMotif'.
326 
327     // This means that if
328 
329     // cleavemotif->m_motifList [0] = "Lys"
330 
331     // cleavemotif->m_motifList [1] = "Pro"
332 
333     // the, we want to search in the polymer list of monomers the same
334     // sequence by iterating in this list from index 'index' onwards,
335     // and we stop searching when the list's end is found or if
336 
337     // list [n] = "Lys" and
338 
339     // list [n+1] = "Pro".
340 
341 
342     if (mp_polymer->size() == 0)
343       return 0;
344 
345     if (codeList.size() == 0)
346       return -1;
347 
348     if (startIndex < 0)
349       return -1;
350 
351     if (endIndex >= mp_polymer->size())
352       return -1;
353 
354     // Seed the routine by setting 'first' to the first motif in the
355     // codeList (in our example this is "Lys").
356 
357     QString firstCode = codeList.first();
358 
359     // And now iterate (starting from 'index') in the polymer
360     // sequence's list of monomers in search for a monomer having the
361     // proper code ("Lys").
362 
363     int iterIndex = startIndex;
364 
365     while (iterIndex < endIndex)
366       {
367         const Monomer *monomer = monomerList.at(iterIndex);
368 
369         if (monomer->code() != firstCode)
370           {
371             // The currently iterated code is not the one we
372             // search. So go one code further in the sequence.
373 
374             ++iterIndex;
375             continue;
376           }
377 
378         // If we are here, then that means that we actually found on
379         // monomer code in the sequence that matches the one we are
380         // looking for.
381 
382         firstIndex = iterIndex;
383         noGood = false;
384 
385         // Now that we have anchored our search at firstIndex in the
386         // polymer sequence, continue with next monomer and check if
387         // it matches the next monomer in the motif we are looking
388         // for.
389 
390         for (int iter = 1; iter < codeList.size(); ++iter)
391           {
392             if (iterIndex + iter >= endIndex)
393               {
394                 noGood = true;
395                 break;
396               }
397 
398             QString nextCode = codeList.at(iter);
399 
400             monomer = monomerList.at(iterIndex + iter);
401 
402             if (monomer->code() == nextCode)
403               continue;
404             else
405               {
406                 noGood = true;
407                 break;
408               }
409           }
410         // End of
411         // for (int iter = 1; iter < codeList.size(); ++iter)
412 
413         if (noGood)
414           {
415             ++iterIndex;
416             continue;
417           }
418         else
419           {
420             return firstIndex;
421           }
422       }
423     // End of
424     // while (iterIndex < monomerList.size())
425 
426     return -1;
427   }
428 
429 
430   bool
accountCleaveRule(CleaveRule * cleaveRule,CleaveOligomer * oligomer)431   Cleaver::accountCleaveRule(CleaveRule *cleaveRule,
432                              CleaveOligomer *oligomer)
433   {
434     Q_ASSERT(cleaveRule);
435     Q_ASSERT(oligomer);
436 
437     const QList<Atom *> &refList =
438       oligomer->polymer()->polChemDef()->atomList();
439 
440     // For each Coordinates element in the oligomer, we have to ensure
441     // we apply the formula(s) that is/are required.
442 
443     int coordinatesCount = static_cast<CoordinateList *>(oligomer)->size();
444 
445     //     qDebug() << __FILE__ << __LINE__
446     // 	      << "Coordinates count:" << coordinatesCount;
447 
448     for (int iter = 0; iter < coordinatesCount; ++iter)
449       {
450         Coordinates *coordinates =
451           static_cast<CoordinateList *>(oligomer)->at(iter);
452 
453         if (!cleaveRule->leftCode().isEmpty())
454           {
455             // The formula has to be there.
456 
457             Formula ruleFormula = cleaveRule->leftFormula();
458             Q_ASSERT(!ruleFormula.text().isEmpty());
459 
460             // What is the monomer at the left end of current oligomer ?
461             const Monomer &monomer = oligomer->atLeftEnd();
462 
463             if (monomer.code() == cleaveRule->leftCode())
464               {
465                 // 		qDebug() << __FILE__ << __LINE__
466                 // 			  << "Matched left code:" << cleaveRule->leftCode();
467 
468                 // But, this is not going to be real true, if the
469                 // monomer is acutally the left-end monomer of the
470                 // polymer sequence, because then that would mean that
471                 // there was no cleavage on this monomer, thus no rule
472                 // to apply.
473 
474                 if (!coordinates->start())
475                   {
476                     // The monomer is not the left-end monomer, so the
477                     // match is real. Account for the formula !
478 
479                     if (!ruleFormula.accountMasses(refList, oligomer))
480                       return false;
481                   }
482               }
483           }
484 
485         if (!cleaveRule->rightCode().isEmpty())
486           {
487             // The formula has to be there.
488 
489             Formula ruleFormula = cleaveRule->rightFormula();
490             Q_ASSERT(!ruleFormula.text().isEmpty());
491 
492             // What is the monomer at the right end of current oligomer ?
493             const Monomer &monomer = oligomer->atRightEnd();
494 
495             if (monomer.code() == cleaveRule->rightCode())
496               {
497                 // 		qDebug() << __FILE__ << __LINE__
498                 // 			  << "Matched right code:" << cleaveRule->rightCode();
499 
500                 // But, this is not going to be real true, if the
501                 // monomer is acutally the right-end monomer of the
502                 // polymer sequence, because then that would mean that
503                 // there was no cleavage on this monomer, thus no rule
504                 // to apply.
505 
506                 if (coordinates->end() != mp_polymer->size() - 1)
507                   {
508                     // The monomer is not the right-end monomer, so
509                     // the match is real. Account for the formula !
510 
511                     if (!ruleFormula.accountMasses(refList, oligomer))
512                       return false;
513                   }
514               }
515           }
516       }
517 
518     return true;
519   }
520 
521 
522   int
cleavePartial(int partialCleavageValue)523   Cleaver::cleavePartial(int partialCleavageValue)
524   {
525     bool oligomerIsPolymer = false;
526 
527     int iter = 0;
528 
529     static int leftIndex = 0;
530     static int rightIndex = 0;
531 
532     Q_ASSERT(partialCleavageValue >= 0);
533 
534     OligomerList partialOligomerList;
535 
536     // Since version 2.3.0, the cleavage might be performed on a
537     // selected portion of a sequence only, not necessarily on the
538     // whole polymer sequence. We have to know these values because
539     // when we create oligomer we'll have to set the coordinates of
540     // the monomers correctly.
541 
542     CoordinateList coordinateList = m_calcOptions.coordinateList();
543     Coordinates *coordinates = coordinateList.first();
544 
545     int startIndex = coordinates->start();
546     int endIndex = coordinates->end();
547 
548     leftIndex = startIndex;
549     rightIndex = 0;
550 
551     // Iterate in the array of indices where the cleavages should occur.
552 
553     for (iter = 0; iter < m_cleaveIndexList.size(); ++iter)
554       {
555         // Make sure, if the partial cleavage is very large, for
556         // example, that it will not lead us to access the polymer
557         // sequence at a position larger than its upper boundary.
558 
559         // Imagine cutting a polymer with only one Met residue with
560         // cyanogen bromide: m_cleaveIndexList will contain a single
561         // element: the index at which the methionine occurs in the
562         // polymer sequence(and there is a single one). Now, Imagine
563         // that we are asked to perform a cleavage with
564         // 'partialCleavageValue' of 2. The way we do it is that we fetch the
565         // index in the list of cleavage indices(m_cleaveIndexList) two
566         // positions farther than the position we are iterating:
567 
568         // int partCleave = iter + partialCleavageValue;
569 
570         // Now, if m_cleaveIndexList contains a single element, asking
571         // for this m_cleaveIndexList.at(iter + partialCleavageValue) will
572         // go out of the boundaries of the list, since it has a single
573         // item and partialCleavageValue is 2. This is what we are willing to
574         // avoid.
575 
576         int partCleave = iter + partialCleavageValue;
577 
578         if (partCleave >= m_cleaveIndexList.size())
579           {
580             if (iter == 0)
581               oligomerIsPolymer = true;
582 
583             break;
584           }
585 
586         rightIndex = m_cleaveIndexList.at(partCleave);
587 
588         QString name = QString("%1#%2")
589           .arg(partialCleavageValue)
590           .arg(iter + 1);
591 
592         CleaveOligomer *oligomer =
593           new CleaveOligomer(mp_polymer,
594                              name, m_cleaveOptions.name(),
595                              Ponderable(),
596                              leftIndex, rightIndex,
597                              partialCleavageValue, m_calcOptions);
598 
599         partialOligomerList.append(oligomer);
600 
601         leftIndex = m_cleaveIndexList.at(iter) + 1;
602       }
603     // End of
604     // for (int iter = 0; iter < m_cleaveIndexList.size(); iter=+)
605 
606     // Do not forget the right-end oligomer ! And be sure to determine
607     // what's its real left end index !
608 
609     if (oligomerIsPolymer)
610       leftIndex = startIndex;
611     else
612       leftIndex = m_cleaveIndexList.at(--iter) + 1;
613 
614     // 'iter' is used to construct the name of the oligomer, so we have
615     // to increment it once because we did not have the opportunity to
616     // increment it between the last but one oligomer and this one.
617 
618     ++iter;
619 
620     // Remove 1 because the oligomer is described using indices and not
621     // positions.
622 
623     rightIndex = endIndex;
624 
625     QString name = QString("%1#%2")
626       .arg(partialCleavageValue)
627       .arg(iter + 1);
628 
629     CleaveOligomer *oligomer =
630       new CleaveOligomer(mp_polymer,
631                          name, m_cleaveOptions.name(),
632                          Ponderable(),
633                          leftIndex, rightIndex,
634                          partialCleavageValue, m_calcOptions);
635 
636     partialOligomerList.append(oligomer);
637 
638     // At this point all the skeleton oligomers have been computed for
639     // the given partialCleavageValue. We still have to perform the
640     // cross-link analysis prior to both calculate the masses and
641     // perform the ionization of all the generated oligomers. Note that
642     // making cross-link analysis is only useful in case the cleavage is
643     // full(partialCleavageValue == 0).
644 
645     if (!partialCleavageValue)
646       {
647         if (m_calcOptions.monomerEntities() & MXT_MONOMER_CHEMENT_CROSS_LINK)
648           {
649             if (analyzeCrossLinks(&partialOligomerList) == -1)
650               {
651                 return false;
652               }
653           }
654       }
655 
656     // Finally, we can now perform the mass calculations and the
657     // ionization. We will use each oligomer in the oligomerList as a
658     // template for creating new oligomers(with different z values) and
659     // all the new oligomers will be appended to waitOligomerList. Each
660     // time a template oligomer will have been used, it will be removed
661     // from oligomerList. Once all the oligomers in oligomerList will
662     // have been used, and thus removed, all the newly allocated
663     // oligomers in waitOligomerList will be moved to oligomerList.
664 
665     OligomerList waitOligomerList;
666 
667     while (partialOligomerList.size())
668       {
669         CleaveOligomer *iterOligomer =
670           static_cast<CleaveOligomer *>(partialOligomerList.takeFirst());
671 
672         // We do not ask that the oligomer be ionized yet, because we
673         // have to first account for potential cleavage rules! Thus we
674         // pass an uninitialized ionization rule with IonizeRule().
675         // iterOligomer->calculateMasses(*mp_calcOptions,
676         // *mp_ionizeRule); This was a bug in the release versions up
677         // to 1.6.1.
678         iterOligomer->calculateMasses(&m_calcOptions);
679 
680         // At this point we should test if the oligomer has to be
681         // processed using cleavage rules.
682 
683         for(int jter = 0; jter < m_cleaveOptions.ruleList()->size(); ++jter)
684           {
685             //  Note that the accounting of the cleavage rule is
686             //  performed as if the oligomer was charged 1. This is why
687             //  we have to ionize the oligomer only after we have
688             //  completed the determination of its atomic composition.
689 
690             CleaveRule *cleaveRule = m_cleaveOptions.ruleList()->at(jter);
691 
692             // qDebug() << __FILE__ << __LINE__
693             //           << "Accounting for cleaverule:"
694             //           << cleaveRule->name();
695 
696             // qDebug() << __FILE__ << __LINE__
697             //           << "Oligomer mono mass before:" << iterOligomer->mono();
698 
699             if (!accountCleaveRule(cleaveRule, iterOligomer))
700               return -1;
701 
702             // 	    qDebug() << __FILE__ << __LINE__
703             // 		      << "Oligomer mono mass after:" << iterOligomer->mono();
704           }
705 
706         // At this point we can finally ionize the oligomer ! Remember
707         // that we have to ionize the oligomer as expected in the
708         // cleavage options. Because the ionization changes the values
709         // in the oligomer, and we need a new oligomer each time, we
710         // duplicate the oligomer each time we need it.
711 
712         int startIonizeLevel = m_cleaveOptions.startIonizeLevel();
713         int endIonizeLevel = m_cleaveOptions.endIonizeLevel() + 1;
714         IonizeRule ionizeRule(m_ionizeRule);
715 
716         for(int kter = startIonizeLevel; kter < endIonizeLevel; ++kter)
717           {
718             ionizeRule.setLevel(kter);
719 
720             CleaveOligomer *newOligomer = new CleaveOligomer(*iterOligomer);
721 
722             if (newOligomer->ionize(ionizeRule) == -1)
723               {
724                 delete newOligomer;
725 
726                 return -1;
727               }
728 
729             // The name was set already during the creation of the
730             // template oligomer. All we have to add to the name is the
731             // ionization level.
732 
733             QString name = iterOligomer->name() +
734               QString("#z=%3").arg(newOligomer->charge());
735 
736              newOligomer->setName(name);
737 
738             waitOligomerList.append(newOligomer);
739           }
740 
741         // We can delete the template oligomer that was already removed
742         // from the oligomerList(use of QList::takeFirst()).
743         delete iterOligomer;
744       }
745 
746     // At this point we should transfer all the oligomers from the
747     // waitOligomerList to the initial oligomerList.
748 
749     while (waitOligomerList.size())
750       {
751         Oligomer *iterOligomer = waitOligomerList.takeFirst();
752 
753         partialOligomerList.append(iterOligomer);
754       }
755 
756     // Finally transfer all the oligomers generated for this partial
757     // cleavage to the list of ALL the oligomers. But before making
758     // the transfert, compute the elemental composition and store it
759     // as a property object.
760 
761     int oligomerCount = partialOligomerList.size();
762 
763     while (partialOligomerList.size())
764       {
765         Oligomer *iterOligomer = partialOligomerList.takeFirst();
766 
767         // Elemental formula
768         QString *text = new QString(iterOligomer->elementalComposition());
769         StringProp *prop = new StringProp("ELEMENTAL_COMPOSITION", text);
770         iterOligomer->appendProp(static_cast<Prop *>(prop));
771 
772         mp_oligomerList->append(iterOligomer);
773       }
774 
775     return oligomerCount;
776   }
777 
778 
779   int
analyzeCrossLinks(OligomerList * oligomerList)780   Cleaver::analyzeCrossLinks(OligomerList *oligomerList)
781   {
782     Q_ASSERT(oligomerList);
783 
784     QList<Oligomer *> crossLinkedOligomerList;
785 
786     // General overview:
787 
788     // Iterate in the polymer's list of cross-links and for each
789     // cross-link find the oligomer that contains the first monomer
790     // involved in the cross-link. This first found oligomer should
791     // serve as a seed to pull-down all the oligomers cross-linked to
792     // it.
793 
794     const CrossLinkList &crossLinkList = mp_polymer->crossLinkList();
795 
796     for (int iter = 0; iter < crossLinkList.size(); ++iter)
797       {
798         CrossLink *crossLink = crossLinkList.at(iter);
799 
800         // With that crossLink, find an oligomer that encompasses the
801         // first monomer of the cross-link.
802 
803         const Monomer *firstMonomer = crossLink->firstMonomer();
804 
805         Q_ASSERT(firstMonomer);
806 
807         // What oligomer does encompass that monomer ?
808 
809         int foundIndex = 0;
810 
811         Oligomer *firstOligomer =
812           oligomerList->findOligomerEncompassing(firstMonomer, &foundIndex);
813 
814         if (firstOligomer)
815           {
816             // At this point we should turn this oligomer into a
817             // cross-linked oligomer, so that we can continue performing its
818             // cross-link analysis. To do that we allocate a list of
819             // oligomers for this cross-linked oligomer, were we'll store
820             // this first oligomer and then all the "pulled-down" oligomers.
821 
822             // Remove the cross-link from the main list of oligomers so
823             // that we do not stumble upon it in the next analysis
824             // steps.
825             oligomerList->removeAt(foundIndex);
826 
827             // Set the cross-linked oligomer apart.
828             crossLinkedOligomerList.append(firstOligomer);
829 
830             // Finally deeply scrutinize the oligomer.
831             analyzeCrossLinkedOligomer(firstOligomer, oligomerList);
832           }
833         else
834           {
835             // qDebug() << __FILE__ << __LINE__
836             //    << "Cross-link at index" << iter
837             //    << "did not find any oligomer for its first monomer "
838             // 	    "partner";
839           }
840       }
841 
842     // At this point we have terminated analyzing all the oligomers
843     // for the partial cleavage. All we have to do is move all the
844     // crossLinked oligomers from the crossLinkedOligomerList to
845     // oligomerList. While doing so make sure that the m_calcOptions
846     // datum has correct CoordinateList data, as these data will be
847     // required later, typically to calculate the elemental formula of
848     // the oligomer.
849 
850     while (crossLinkedOligomerList.size())
851       {
852         Oligomer *oligomer = crossLinkedOligomerList.takeAt(0);
853 
854         oligomer->updateCalcOptions();
855 
856         oligomerList->append(oligomer);
857       }
858 
859     crossLinkedOligomerList.clear();
860 
861     // Return the number of cross-linked/non-cross-linked oligomers
862     // alltogether.
863 
864     return oligomerList->size();
865   }
866 
867 
868   int
analyzeCrossLinkedOligomer(Oligomer * oligomer,OligomerList * oligomerList)869   Cleaver::analyzeCrossLinkedOligomer(Oligomer *oligomer,
870                                       OligomerList *oligomerList)
871   {
872     Q_ASSERT(oligomer);
873     Q_ASSERT(oligomerList);
874 
875     const CrossLinkList &crossLinkList =  mp_polymer->crossLinkList();
876 
877     OligomerList clearanceOligomerList;
878 
879     // 'oligomer' is the first oligomer in the cross-link series of
880     // oligomers. It is the "seeding" oligomer with which to pull-down
881     // all the others. Prepend to its name the "cl-" string to let it
882     // know it is cross-linked.
883 
884     QString name = oligomer->name();
885     name.prepend("cl-");
886     oligomer->setName(name);
887 
888     // Iterate in the 'oligomer' and for each monomer get any
889     // cross-linked oligomer out of the list of cross-links.
890 
891     for (int iter = oligomer->startIndex();
892          iter < oligomer->endIndex() + 1; ++iter)
893       {
894         const Monomer *monomer = mp_polymer->at(iter);
895 
896         // What crossLinks do involve this monomer ?
897 
898         QList<int> crossLinkIndices;
899 
900         int ret =
901           crossLinkList.crossLinksInvolvingMonomer(monomer,
902                                                    &crossLinkIndices);
903 
904         if (ret)
905           {
906             // At least one cross-link involves the monomer currently
907             // iterated in the oligomer being analysed.
908 
909             int index = 0;
910 
911             foreach(index, crossLinkIndices)
912               {
913                 CrossLink *crossLink = crossLinkList.at(index);
914 
915                 // 	      qDebug() << __FILE__ << __LINE__
916                 // 			<< crossLink->name();
917 
918                 // First off, we can add the cross-link to the list of
919                 // cross-links of the oligomer(we'll need them to be
920                 // able to perform mass calculations). Note that this is
921                 // only copying the pointer to the actual cross-link in
922                 // the polymer's list of cross-links. Note also that a
923                 // cross-link might not be found more than once(the
924                 // call below first checks that the cross-link is not
925                 // already in the list).
926 
927                 if (!oligomer->addCrossLink(crossLink))
928                   {
929                     //  qDebug() << __FILE__ << __LINE__
930                     //	    << "The cross-link:"
931                     //	    << crossLink->name()
932                     //	    << "was already in the"
933                     //	    << oligomer
934                     //	    << "oligomer's list of cross-links: "
935                     // 		    "not duplicated.";
936                   }
937                 else
938                   {
939                     //  qDebug() << __FILE__ << __LINE__
940                     //	    << "The cross-link:"
941                     //	    << crossLink->name()
942                     //	    << "was added to the"
943                     //	    << oligomer
944                     //	    << "oligomer's list of cross-links.";
945                   }
946 
947                 const Monomer *iterMonomer = 0;
948 
949                 foreach(iterMonomer, *(crossLink->monomerList()))
950                   {
951                     // 	 qDebug() << __FILE__ << __LINE__
952                     // 	    << iterMonomer->name();
953 
954                     int foundIndex = 0;
955 
956                     Oligomer *foundOligomer =
957                       oligomerList->findOligomerEncompassing(iterMonomer,
958                                                              &foundIndex);
959 
960                     if (foundOligomer)
961                       {
962                         // qDebug() << __FILE__ << __LINE__
963                         // 	<< foundOligomer->name() << foundIndex;
964 
965                         // One oligomer in the original oligomer list
966                         // encompasses a monomer that seems to be
967                         // cross-linked to the 'monomer' being iterated
968                         // in in the currently analyzed oligomer. Move
969                         // that oligomer to the clearance list of
970                         // oligomer that will need to be further
971                         // analyzed later.
972 
973                         oligomerList->removeAt(foundIndex);
974 
975                         clearanceOligomerList.append(foundOligomer);
976 
977                         // Update the name of the oligomer with the name
978                         // of the new foundOligomer.
979 
980                         QString name = QString("%1+%2")
981                           .arg(oligomer->name())
982                           .arg(foundOligomer->name());
983                         oligomer->setName(name);
984                       }
985                   }
986               }
987             // End of
988             // foreach(index, crossLinkIndices)
989           }
990       }
991 
992     // At this point we have one oligomer which we know is cross-linked
993     // at least once(with another oligomer or the cross-link is between
994     // two or more monomers in the same oligomer, think cyan fluorescent
995     // protein). If monomers in that same oligomer were cross-linked to
996     // other monomers in other oligomers, then these oligomers should by
997     // now have been moved from the original list of oligomers
998     //(oligomerList) to the clearance list of oligomers
999     //(clearanceOligomerList). We have to iterate in each oligomer of that
1000     // clearance list and for each of its monomers, check if it has a
1001     // cross-link to any oligomer still in the original oligomerList
1002     //(this is what I call "pull-down" stuff). Found oligomers are
1003     // appended to the clearanceOligomerList.
1004 
1005     while (clearanceOligomerList.size())
1006       {
1007         Oligomer *iterOligomer = clearanceOligomerList.first();
1008 
1009         for(int iter = iterOligomer->startIndex();
1010             iter < iterOligomer->endIndex() + 1; ++iter)
1011           {
1012             const Monomer *monomer = mp_polymer->at(iter);
1013 
1014             // 	  qDebug() << __FILE__ << __LINE__
1015             // 		    << monomer->name();
1016 
1017             // What crossLinks do involve this monomer ?
1018 
1019             QList<int> crossLinkIndices;
1020 
1021             int ret =
1022               crossLinkList.crossLinksInvolvingMonomer(monomer,
1023                                                        &crossLinkIndices);
1024 
1025             if (ret)
1026               {
1027                 // At least one cross-link involves the monomer currently
1028                 // iterated in the iterOligomer being analysed.
1029 
1030                 int index = 0;
1031 
1032                 foreach(index, crossLinkIndices)
1033                   {
1034                     CrossLink *crossLink = crossLinkList.at(index);
1035 
1036                     // 		  qDebug() << __FILE__ << __LINE__
1037                     // 			    << crossLink->name();
1038 
1039                     // First off, we can add the cross-link to the list of
1040                     // cross-links of the oligomer(we'll need them to be
1041                     // able to perform mass calculations). Note that this is
1042                     // only copying the pointer to the actual cross-link in
1043                     // the polymer's list of cross-links. Note also that a
1044                     // cross-link might not be found more than once(the
1045                     // call below first checks that the cross-link is not
1046                     // already in the list).
1047 
1048                     if (!oligomer->addCrossLink(crossLink))
1049                       {
1050                         // qDebug() << __FILE__ << __LINE__
1051                         // << "The cross-link:"
1052                         // << crossLink->name()
1053                         // << "was already in the"
1054                         // << oligomer
1055                         // << "oligomer's list of cross-links: "
1056                         // "not duplicated.";
1057                       }
1058                     else
1059                       {
1060                         // qDebug() << __FILE__ << __LINE__
1061                         // << "The cross-link:"
1062                         // << crossLink->name()
1063                         // << "was added to the"
1064                         // << oligomer
1065                         // << "oligomer's list of cross-links.";
1066                       }
1067 
1068                     const Monomer *iterMonomer = 0;
1069 
1070                     foreach(iterMonomer, *(crossLink->monomerList()))
1071                       {
1072                         // qDebug() << __FILE__ << __LINE__
1073                         //	<< iterMonomer->name();
1074 
1075                         int foundIndex = 0;
1076 
1077                         Oligomer *foundOligomer =
1078                           oligomerList->findOligomerEncompassing(iterMonomer,
1079                                                                  &foundIndex);
1080 
1081                         if (foundOligomer)
1082                           {
1083                             // qDebug() << __FILE__ << __LINE__
1084                             //          << foundOligomer->name() << foundIndex;
1085 
1086                             // One oligomer in the original oligomer list
1087                             // encompasses a monomer that seems to be
1088                             // cross-linked to the 'monomer' being iterated
1089                             // in in the currently analyzed oligomer. Move
1090                             // that oligomer to the clearance list of
1091                             // oligomer that will need to be further
1092                             // analyzed later.
1093 
1094                             oligomerList->removeAt(foundIndex);
1095 
1096                             clearanceOligomerList.append(foundOligomer);
1097 
1098                             // Update the name of the oligomer with the name
1099                             // of the new foundOligomer.
1100 
1101                             QString name = QString("%1+%2")
1102                               .arg(oligomer->name())
1103                               .arg(foundOligomer->name());
1104                             oligomer->setName(name);
1105                           }
1106                       }
1107                   }
1108                 // End of
1109                 // foreach(index, crossLinkIndices)
1110               }
1111             // End of(ret) ie cross-links involved monomer
1112           }
1113         // End of
1114         //   for (int iter = iterOligomer->startIndex();
1115         //   iter < iterOligomer->endIndex() + 1; ++iter)
1116 
1117         // At this point this quarantinized oligomer might be removed
1118         // from the clearance clearanceOligomerList and its coordinates
1119         // be appended to the 'oligomer' list of coordinates. Then, the
1120         // quanrantinized oligomer might be destroyed.
1121 
1122         clearanceOligomerList.removeFirst();
1123 
1124         oligomer->appendCoordinates(iterOligomer);
1125 
1126         delete iterOligomer;
1127       }
1128 
1129     // At this point, all the oligomers in the clearance oligomer list
1130     // have all been dealt with, return the number of cross-linked
1131     // oligomers in this oligomer.
1132 
1133     //   for (int iter = 0; iter < oligomer->crossLinkList()->size(); ++iter)
1134     //     qDebug() << __FILE__ << __LINE__
1135     // 	      << "Finished for oligomer:" <<
1136     //       oligomer->crossLinkList()->at(iter)->name();
1137 
1138 
1139     return static_cast<CoordinateList *>(oligomer)->size();
1140   }
1141 
1142 
1143   void
emptyOligomerList()1144   Cleaver::emptyOligomerList()
1145   {
1146     while (mp_oligomerList->size())
1147       {
1148         delete mp_oligomerList->takeFirst();
1149       }
1150   }
1151 
1152 } // namespace massXpert
1153