1 /* $Id: msms.hpp 575325 2018-11-27 18:22:00Z ucko $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the authors in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Lewis Y. Geer
27  *
28  * File Description:
29  *    Helper classes for ms search algorithms
30  *
31  * ===========================================================================
32  */
33 
34 #ifndef MSMS__HPP
35 #define MSMS__HPP
36 
37 #ifdef WIN32
38 #pragma warning(disable:4786)
39 #endif
40 
41 #include <list>
42 #include <iostream>
43 #include <fstream>
44 #include <string>
45 #include <set>
46 #include <deque>
47 #include <map>
48 #include <objects/omssa/MSModSpecSet.hpp>
49 #include <objects/omssa/MSSearchSettings.hpp>
50 #include <util/sequtil/sequtil_convert.hpp>
51 #include "Mod.hpp"
52 #include "SpectrumSet.hpp"
53 
54 // #include <corelib/ncbistd.hpp>
55 
56 BEGIN_NCBI_SCOPE
57 BEGIN_SCOPE(objects)
58 BEGIN_SCOPE(omssa)
59 
60 
61 // non-redundified integer intervals of amino acids
62 const int kNumAAIntervals = 19;
63 
64 // ABCXYZ ion mass calculation constants.  See Papayannopoulos, pg 63.
65 
66 /** mass of water */
67 const double kWater = 18.010565;
68 
69 /** neutron mass */
70 const double kNeutron = 1.008664904;
71 
72 // const double AAAbundance[] = {1.0, 0.0758, 1.0, 0.0167, 0.0528, 0.0635, 0.0408, 0.0683, 0.0224, 0.058, 0.0593, 0.0943, 0.0237, 0.0447, 0.0491, 0.0399, 0.0514, 0.0715, 0.0569, 0.0656, 0.0124, 1.0, 0.0318, 1.0, 1.0, 1.0, 0.0};
73 
74 // masses taken from Papayannopoulos, IA, Mass Spectrometry Reviews, 1995, 14, 49-73.
75 // selenocysteine calculated by using cysteine mass and adding difference between Se and S from webelements.
76 // monoisotopic mass
77 const double MonoMass[] = {0.0, 71.03711, 0.0, 103.00919, 115.02694, 129.04259, 147.06841, 57.02147, 137.05891, 113.08406, 128.09496, 113.08406, 131.04049, 114.04293, 97.05276, 128.05858, 156.10111, 87.03203, 101.04768, 99.06841, 186.07931, 0.0, 163.06333, 0.0, 149.903 , 0.0, 113.08406, 237.14776, 0.0 };
78 // average mass
79 const double AverageMass[] = {0.0, 71.08, 0.0, 103.15, 115.09, 129.12, 147.18, 57.05, 137.14, 113.16, 128.17, 113.16, 131.20, 114.10, 97.12, 128.13, 156.19, 87.08, 101.11, 99.13, 186.21, 0.0, 163.18, 0.0, 150.044, 0.0, 113.16, 237.30, 0.0 };
80 // n15 enriched monoisotopic mass
81 const double MonoN15Mass[] = {0.0, 72.034144893, 0.0, 104.006224893, 116.023974893, 130.039624893, 148.065444893, 58.018494893, 140.050014679, 114.081094893, 130.089029786, 114.081094893, 132.037524893, 116.036999786, 98.049794893, 130.052649786, 160.089239572, 88.029064893, 102.044714893, 100.065444893, 188.073379786, 0.0, 164.060364893, 0.0, 150.8964, 0.0, 114.081094893, 240.1388649, 0.0 };
82 
83 
84 // const int AAIntervals[] = { 57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 150, 156, 163, 186 };
85 
86 
87 const double kTermMass[] =  {1.007825, 1.007825, 1.007825, 17.00274, 17.00274, 17.00274, 0, 0, 0, 0, 1.007825, 17.00274, 1.007825};
88 const double kIonTypeMass[] = { -27.994915, 0.0, 17.02655, 27.994915, 2.01565, -14.003075, 0, 0, 0, 0, -26.98709, -15.9994, -70.981405};
89 
90 /////////////////////////////////////////////////////////////////////////////
91 //
92 //  CMassArray::
93 //
94 //  Holds AA indexed mass array
95 //
96 
97 class NCBI_XOMSSA_EXPORT CMassArray {
98 public:
CMassArray(void)99     CMassArray(void) {};
100 
101     const double * const GetMass(void) const;
102     const int * const GetIntMass(void) const;
103 
104     //! initialize mass arrays with fixed mods
105     void Init(const CMSSearchSettings::TProductsearchtype &SearchType);
106     // initialize mass arrays with fixed mods
107     void Init(const CMSMod &Mods,
108 	      const CMSSearchSettings::TProductsearchtype &SearchType,
109               CRef <CMSModSpecSet> Modset);
110 private:
111     // inits mass arrays
112     void x_Init(const CMSSearchSettings::TProductsearchtype &SearchType);
113     // masses as doubles
114     double CalcMass[kNumUniqueAA];
115     // mass in scaled integer Daltons
116     int IntCalcMass[kNumUniqueAA];
117     // Se mass is 78.96, S is 32.066
118 };
119 
120 ///////////////////   CMassArray inline methods
121 
GetMass(void) const122 inline const double * const CMassArray::GetMass(void) const
123 {
124     return CalcMass;
125 }
126 
GetIntMass(void) const127 inline const int * const CMassArray::GetIntMass(void) const
128 {
129     return IntCalcMass;
130 }
131 
132 /////////////////// end of CMassArray inline methods
133 
134 
135 /////////////////////////////////////////////////////////////////////////////
136 //
137 //  CAA::
138 //
139 //  lookup table for AA index
140 //
141 
142 // lookup table for reversing an AA character to AA number
143 class NCBI_XOMSSA_EXPORT CAA {
144 public:
145     CAA(void);
146 
147     /**
148      * return the map for translating AA char to AA number
149      */
150     const char * const GetMap(void) const;
151 
152 private:
153     char AAMap[256];
154 };
155 
156 
157 ///////////////////    CAA inline methods
158 
159 inline
GetMap(void) const160 const char * const CAA::GetMap(void) const
161 {
162     return AAMap;
163 }
164 
165 /////////////////// end of CAA inline methods
166 
167 /**
168  * contains information for a post translational modification
169  * at a particular sequence site
170  */
171 
172 class NCBI_XOMSSA_EXPORT CMod {
173 public:
174 
175     /**
176      * type for a site on a sequence
177      */
178 
179     typedef const char * TSite;
180 
181     /**
182      *  type for masses
183      */
184     typedef int TMass;
185 
186     /**
187      * what is the type of the mod?
188      */
189     typedef int TEnum;
190 
191     /**
192      * is the mod fixed?
193      */
194     typedef int TFixed;
195 
196     /**
197      * default constructor
198      */
199     CMod(void);
200 
201     /**
202      * copy constructor
203      */
204     CMod(const CMod &Old);
205 
206     /**
207      * assignment
208      */
209     const CMod& operator= (const CMod& rhs);
210 
211     /**
212      * reset to default values
213      */
214     void Reset(void);
215 
216     /**
217      * Get the site position
218      */
219     TSite GetSite(void) const;
220 
221     /**
222      * Set the site postion
223      */
224     TSite& SetSite(void);
225 
226     /**
227      * Get the mass to be added to the precursor mass
228      */
229     TMass GetPrecursorDelta(void) const;
230 
231     /**
232      * Set the site postion
233      */
234     TMass& SetPrecursorDelta(void);
235 
236     /**
237      * Get the mass to be added to the product mass
238      */
239     TMass GetProductDelta(void) const;
240 
241     /**
242      * Set the site postion
243      */
244     TMass& SetProductDelta(void);
245 
246     /**
247      * Get mod type
248      */
249     TEnum GetEnum(void) const;
250 
251     /**
252      * Set the mod type
253      */
254     TEnum& SetEnum(void);
255 
256     /**
257      * Is the mod fixed?
258      */
259     TFixed GetFixed(void) const;
260 
261     /**
262      * set mod state (1 = fixed)
263      */
264     TFixed& SetFixed(void);
265 
266 private:
267 	/**
268      *  the position within the peptide of a variable modification
269      */
270 	const char *Site;
271 
272 	/**
273      *  the modification mass for the precursor
274      */
275 	int PrecursorDelta;
276 
277     /**
278      *  the modification mass for the product
279      */
280     int ProductDelta;
281 
282 	/**
283      *  the modification type (used for saving for output)
284      */
285 	int ModEnum;
286 
287 	/**
288      *  track fixed mods, 1 == fixed
289      */
290 	int IsFixed;
291 };
292 
293 /**
294  * default constructor
295  */
296 inline
CMod(void)297 CMod::CMod(void)
298 {
299     Reset();
300 }
301 
302 /**
303  * reset to default values
304  */
305 inline
Reset(void)306 void CMod::Reset(void)
307 {
308     Site = (const char *)-1;
309     PrecursorDelta = 0;
310     ProductDelta = 0;
311     ModEnum = 0;
312     IsFixed = 0;
313 }
314 
315 /**
316  * copy constructor
317  */
318 inline
CMod(const CMod & Old)319 CMod::CMod(const CMod &Old)
320 {
321     *this = Old;
322 }
323 
324 /**
325  * assignment
326  */
327 inline
operator =(const CMod & rhs)328 const CMod& CMod::operator= (const CMod& rhs)
329 {
330     Site = rhs.Site;
331     PrecursorDelta = rhs.PrecursorDelta;
332     ProductDelta = rhs.ProductDelta;
333     ModEnum = rhs.ModEnum;
334     IsFixed = rhs.IsFixed;
335 
336     return *this;
337 }
338 
339 /**
340  * Get the site position
341  */
342 inline
GetSite(void) const343 CMod::TSite CMod::GetSite(void) const
344 {
345     return Site;
346 }
347 
348 /**
349  * Set the site postion
350  */
351 inline
SetSite(void)352 CMod::TSite& CMod::SetSite(void)
353 {
354     return Site;
355 }
356 
357 /**
358  * Get the mass to be added to the precursor mass
359  */
360 inline
GetPrecursorDelta(void) const361 CMod::TMass CMod::GetPrecursorDelta(void) const
362 {
363     return PrecursorDelta;
364 }
365 
366 /**
367  * Set the site postion
368  */
369 inline
SetPrecursorDelta(void)370 CMod::TMass& CMod::SetPrecursorDelta(void)
371 {
372     return PrecursorDelta;
373 }
374 
375 /**
376  * Get the mass to be added to the product mass
377  */
378 inline
GetProductDelta(void) const379 CMod::TMass CMod::GetProductDelta(void) const
380 {
381     return ProductDelta;
382 }
383 
384 /**
385  * Set the site postion
386  */
387 inline
SetProductDelta(void)388 CMod::TMass& CMod::SetProductDelta(void)
389 {
390     return ProductDelta;
391 }
392 
393 /**
394  * Get mod type
395  */
396 inline
GetEnum(void) const397 CMod::TEnum CMod::GetEnum(void) const
398 {
399     return ModEnum;
400 }
401 
402 /**
403  * Set the mod type
404  */
405 inline
SetEnum(void)406 CMod::TEnum& CMod::SetEnum(void)
407 {
408     return ModEnum;
409 }
410 
411 /**
412  * Is the mod fixed?
413  */
414 inline
GetFixed(void) const415 CMod::TFixed CMod::GetFixed(void) const
416 {
417     return IsFixed;
418 }
419 
420 /**
421  * set mod state (1 = fixed)
422  */
423 inline
SetFixed(void)424 CMod::TFixed& CMod::SetFixed(void)
425 {
426     return IsFixed;
427 }
428 
429 
430 
431 /**
432  * generic exception class for omssa
433  */
434 
435 class COMSSAException: EXCEPTION_VIRTUAL_BASE public CException {
436     public:
437     /// Error types that subsystem can generate.
438     enum EErrCode {
439         eMSParseException,		///< unable to parse COMSSASearch
440         eMSNoMatchException,	///< unmatched sequence library
441         eMSLadderNotFound	    ///< ladder not found in CLadderContainer
442     };
443 
444     /// Translate from the error code value to its string representation.
GetErrCodeString(void) const445     virtual const char* GetErrCodeString(void) const override
446     {
447         switch (GetErrCode()) {
448         case eMSParseException: return "unable to parse COMSSASearch";
449         case eMSNoMatchException: return "unmatched sequence library";
450         case eMSLadderNotFound: return "ladder not found in CLadderContainer";
451         default:     return CException::GetErrCodeString();
452         }
453     }
454 
455     // Standard exception boilerplate code.
456     NCBI_EXCEPTION_DEFAULT(COMSSAException, CException);
457 };
458 
459 
460 /////////////////////////////////////////////////////////////////////////////
461 //
462 //  CCleave::
463 //
464 //  Classes for cleaving sequences quickly and computing masses
465 //
466 
467 typedef std::deque <int> TCleave;
468 
469 class NCBI_XOMSSA_EXPORT CCleave : public CObject {
470 public:
471     CCleave(void);
472 
473     /**
474      * cleaves the sequence.  Note that output is 0 and the positions
475      * of the aa's to be cleaved.  Should be interpreted as [0, pos1],
476      * (pos1, pos2], ..., (posn, end].  This weirdness is historical --
477      * the C++ string class uses an identifier for end-of-string and has
478      * no identifier for before start of string.
479      *
480      * @param SeqStart pointer to start of sequence
481      * @param SeqEnd pointer to end of sequence
482      * @param PepStart ** to the start of peptide
483      * @param Masses cumulative masses of peptides
484      * @param NumMod number of variable mods
485      * @param MaxNumMod upper bound on number of variable mods
486      * @param EndMass the end masses of the peptides
487      * @param VariableMods list of variable mods
488      * @param FixedMods list of fixed modifications
489      * @param ModList mod site info
490      * @param IntCalcMass integer AA masses
491      * @param PrecursorIntCalcMass integer precursor masses
492      * @param Modset list of possible mods
493      * @param Maxproductions max number of product ions to calculate
494      *
495      * @return are we at the end of the sequence?
496      */
497     bool CalcAndCut(const char *SeqStart,
498                     const char *SeqEnd,  // the end, not beyond the end
499                     const char **PepStart,  // return value
500                     int *Masses,  // Masses, indexed by miss cleav, mods
501                     int& NumMod,   // num Mods
502                     int MaxNumMod, // max num mods
503                     int *EndMasses,
504                     CMSMod &VariableMods,
505                     CMSMod &FixedMods,
506                     CMod ModList[],
507                     const int *IntCalcMass,  // array of int AA masses
508                     const int *PrecursorIntCalcMass, // precursor masses
509                     CRef <CMSModSpecSet> &Modset,
510                     int Maxproductions
511                     );
512 
513 
514     /**
515      *  Check to see if we are at a cleavage point
516      *  Used by CalcAndCut
517      *
518      * @param iPepStart pointer to location of sequence cursor
519      * @param iSeqStart points to start of the sequence
520      */
521 
522     bool CheckCleave(const char *iPepStart, const char *iSeqStart);
523 
524 
525     /**
526      * is the character given one of the cleavage chars?
527      *
528      * @param iPepStart position in the sequence
529      *
530      */
531     bool CheckCleaveChar(const char *iPepStart) const;
532 
533 
534     void CalcMass(char SeqChar,
535 		  int *Masses,
536 		  const int *IntCalcMass
537 		  );
538 
539     void EndMass(int *Masses
540 		 );
541 
542     int findfirst(char* Seq, int Pos, int SeqLen);
543 
544     ///
545     /// looks for non-specific ptms
546     ///
547     void CheckNonSpecificMods(EMSModType ModType, // the type of mod
548                               CMSMod &VariableMods, // list of mods to look for
549                               int& NumMod, // number of mods applied to peptide
550                               int MaxNumMod,  // maximum mods for a peptide
551                               CMod ModList[],  // list of mod sites
552                               const char *iPepStart, // position in protein
553                               bool setfixed,
554                               CRef <CMSModSpecSet> &Modset
555                   );
556 
557     ///
558     /// looks for amino acid specific ptms
559     ///
560     void CheckAAMods(EMSModType ModType, // the type of mod
561                      CMSMod &VariableMods, // list of mods to look for
562                      int& NumMod, // number of mods applied to peptide
563                      char SeqChar,  // the amino acid
564                      int MaxNumMod,  // maximum mods for a peptide
565                      CMod ModList[],  // list of mod sites
566                      const char *iPepStart,  // position in protein
567                      bool setfixed,
568                      CRef <CMSModSpecSet> &Modset
569              );
570 
571     /**
572      * checks all mods for a particular type
573      */
574     void CheckMods(EMSModType NonSpecificIn, EMSModType Specific,
575                    CMSMod &VariableMods, CMSMod &FixedMods,
576 				   int& NumMod, char SeqChar, int MaxNumMod,
577 				   CMod ModList[],
578 				   const char *iPepStart,
579                    CRef <CMSModSpecSet> &Modset);
580 
581     /**
582      * Is the enzyme really a top-down search?
583      */
584     bool GetTopDown(void) const;
585 
586     /**
587      * Get the enzyme stop value
588      */
589     const char * GetStop(void) const;
590 
591     /**
592      * Set the enzyme stop value
593      */
594     const char * & SetStop(void);
595 
596     /**
597      * Is this a non-specific search?
598      */
599     bool GetNonSpecific(void) const;
600 
601     /**
602       * Get the number of cleavage chars
603       */
604     int GetCleaveNum(void) const;
605 
606     /**
607      * Get the the cleave offset, 0 = cterm, 1 = nterm
608      */
609     const char * GetCleaveOffset(void) const;
610 
611     /**
612      * Is there n-term methionine cleavage?
613      */
614     bool GetNMethionine(void) const;
615 
616     /**
617      * Set n-term methionine cleavage
618      */
619     bool& SetNMethionine(void);
620 
621     const string GetCleaveAt(void) const;
622 
623     bool GetCheckProline(void) const;
624 
625     const char * GetCleaveSense(void) const;
626 
627 protected:
628     int ProtonMass; // mass of the proton
629     int TermMass;  // mass of h2o
630     CAA ReverseAA;
631 
632     /**
633      *  where to cleave.  last two letters are in readdb format, assuming
634      * it uses the UniqueAA alphabet
635      */
636     const char *CleaveAt;
637 
638     /**
639      *  what is the cleavage offset
640      */
641     const char *CleaveOffset;
642 
643     /**
644      *  How many cleavage characters
645      */
646     int kCleave;
647 
648     /**
649      * TopDown
650      * does this signify a top-down search
651      */
652     bool TopDown;
653 
654     /**
655      * Stop
656      * Stop position for no-enzyme and semi-tryptic searches
657      */
658     const char *Stop;
659 
660     /**
661      * Is this a non-specific search?
662      */
663     bool NonSpecific;
664 
665     /**
666      * Should we apply the proline rule (no cleavage before proline)
667      */
668     bool CheckProline;
669 
670     /**
671      * n-terminal methionine cleavage
672      */
673     bool NMethionine;
674 };
675 
676 
677 ///////////////////    CCleave inline methods
678 
679 /**
680  * What are the cleavage chars?
681  *
682  */
683 inline
GetCleaveAt(void) const684 const string CCleave::GetCleaveAt(void) const
685 {
686     string out;
687     CSeqConvert::Convert(CleaveAt, CSeqUtil::e_Ncbistdaa, 0, kCleave, out, CSeqUtil::e_Ncbieaa);
688     return out;
689 }
690 
691 /**
692  * Should we check for proline?
693  *
694  */
695 inline
GetCheckProline(void) const696 bool CCleave::GetCheckProline(void) const
697 {
698     return CheckProline;
699 }
700 
701 /**
702  * Should we check for proline?
703  *
704  */
705 inline
GetCleaveSense(void) const706 const char * CCleave::GetCleaveSense(void) const
707 {
708     if (CleaveOffset[0] == 0) return "C";
709     else return "N";
710 }
711 
712 
713 /**
714  * is the character given one of the cleavage chars?
715  *
716  * @param iPepStart position in the sequence
717  *
718  */
719 inline
CheckCleaveChar(const char * iPepStart) const720 bool CCleave::CheckCleaveChar(const char *iPepStart) const
721 {
722     int j;
723     for(j = 0; j < kCleave; j++)
724         if(*(iPepStart + CleaveOffset[j]) == CleaveAt[j]) return true;
725     return false;
726 }
727 
728 
729 /**
730  *  Check to see if we are at a cleavage point
731  *  Used by CalcAndCut
732  *
733  * @param iPepStart pointer to location of sequence cursor
734  */
735 inline
CheckCleave(const char * iPepStart,const char * iSeqStart)736 bool CCleave::CheckCleave(const char *iPepStart, const char *iSeqStart)
737 {
738     // methionine cleavage
739     // (allowed even if TopDown or NonSpecific)
740     if(iPepStart == iSeqStart && NMethionine && *(iPepStart) == '\x0c') {
741         return true;
742     }
743 
744     if(TopDown) return false; // todo: methionine cleavage allowed
745 
746     if(NonSpecific) {
747         if(iPepStart == GetStop()) return true;
748         return false; // todo: methionine cleavage allowed
749     }
750 
751     // check specific cleave amino acids
752     if(CheckCleaveChar(iPepStart)) {
753         if(CheckProline && *(iPepStart+1) == '\x0e' )
754             return false;  // not before proline
755         return true;
756     }
757     return false;
758 }
759 
760 
761 inline
CalcMass(char SeqChar,int * Masses,const int * IntCalcMass)762 void CCleave::CalcMass(char SeqChar,
763 		       int *Masses,
764 		       const int *IntCalcMass
765 		       )
766 {
767     *Masses += IntCalcMass[ReverseAA.GetMap()[SeqChar]];
768 }
769 
770 
771 inline
EndMass(int * EndMasses)772 void CCleave::EndMass( int *EndMasses
773 		       )
774 {
775     *EndMasses = TermMass;
776 }
777 
778 
779 inline
CheckAAMods(EMSModType ModType,CMSMod & VariableMods,int & NumMod,char SeqChar,int MaxNumMod,CMod ModList[],const char * iPepStart,bool setfixed,CRef<CMSModSpecSet> & Modset)780 void CCleave::CheckAAMods(EMSModType ModType, CMSMod &VariableMods, int& NumMod,
781                           char SeqChar, int MaxNumMod, CMod ModList[],
782                           const char *iPepStart,
783                           bool setfixed,
784                           CRef <CMSModSpecSet> &Modset)
785 {
786     // iterator thru mods VariableMods.GetAAMods(ModType)
787     size_t iMods;
788     int iChar;
789 
790     for (iMods = 0;
791         iMods <  VariableMods.GetAAMods(ModType).size(); ++iMods) {
792         for (iChar = 0; iChar < Modset->GetModNumChars(VariableMods.GetAAMods(ModType)[iMods]); ++iChar) {
793             if (SeqChar == Modset->GetModChar(VariableMods.GetAAMods(ModType)[iMods], iChar) && NumMod < MaxNumMod) {
794                 ModList[NumMod].SetSite() = iPepStart;
795                 ModList[NumMod].SetPrecursorDelta() = Modset->GetModMass(VariableMods.GetAAMods(ModType)[iMods]);
796                 ModList[NumMod].SetProductDelta() = Modset->GetNeutralLoss(VariableMods.GetAAMods(ModType)[iMods]);
797                 ModList[NumMod].SetEnum() = VariableMods.GetAAMods(ModType)[iMods];
798                 if (setfixed) ModList[NumMod].SetFixed() = 1;
799                 else ModList[NumMod].SetFixed() = 0;
800                 NumMod++;
801             }
802         }
803     }
804 }
805 
806 
807 inline
CheckNonSpecificMods(EMSModType ModType,CMSMod & VariableMods,int & NumMod,int MaxNumMod,CMod ModList[],const char * iPepStart,bool setfixed,CRef<CMSModSpecSet> & Modset)808 void CCleave::CheckNonSpecificMods(EMSModType ModType, CMSMod &VariableMods,
809                                    int& NumMod, int MaxNumMod,
810                                    CMod ModList[],
811                                    const char *iPepStart,
812                                    bool setfixed,
813                                    CRef <CMSModSpecSet> &Modset)
814 {
815     // iterator thru mods
816     size_t iMods;
817 
818     for (iMods = 0;
819         iMods <  VariableMods.GetAAMods(ModType).size(); ++iMods) {
820         if (NumMod < MaxNumMod) {
821             ModList[NumMod].SetSite() = iPepStart;
822             ModList[NumMod].SetPrecursorDelta() = Modset->GetModMass(VariableMods.GetAAMods(ModType)[iMods]);
823             ModList[NumMod].SetProductDelta() = Modset->GetNeutralLoss(VariableMods.GetAAMods(ModType)[iMods]);
824             ModList[NumMod].SetEnum() = VariableMods.GetAAMods(ModType)[iMods];
825             if (setfixed) ModList[NumMod].SetFixed() = 1;
826             else  ModList[NumMod].SetFixed() = 0;
827             NumMod++;
828         }
829     }
830 }
831 
832 inline
CheckMods(EMSModType NonSpecificIn,EMSModType Specific,CMSMod & VariableMods,CMSMod & FixedMods,int & NumMod,char SeqChar,int MaxNumMod,CMod ModList[],const char * iPepStart,CRef<CMSModSpecSet> & Modset)833 void CCleave::CheckMods(EMSModType NonSpecificIn, EMSModType Specific,
834                         CMSMod &VariableMods, CMSMod &FixedMods,
835                         int& NumMod, char SeqChar, int MaxNumMod,
836                         CMod ModList[],
837                         const char *iPepStart,
838                         CRef <CMSModSpecSet> &Modset)
839 {
840     // check non-specific mods
841     CheckNonSpecificMods(NonSpecificIn, VariableMods, NumMod, MaxNumMod, ModList,
842                          iPepStart, false, Modset);
843     CheckNonSpecificMods(NonSpecificIn, FixedMods, NumMod, MaxNumMod, ModList,
844                          iPepStart, true, Modset);
845     // check specific mods
846     CheckAAMods(Specific, VariableMods, NumMod, SeqChar, MaxNumMod, ModList,
847                 iPepStart, false, Modset);
848     // fix
849     CheckAAMods(Specific, FixedMods, NumMod, SeqChar, MaxNumMod, ModList,
850                 iPepStart, true, Modset);
851 }
852 
853 inline
GetTopDown(void) const854 bool CCleave::GetTopDown(void) const
855 {
856     return TopDown;
857 }
858 
859 inline
GetNonSpecific(void) const860 bool CCleave::GetNonSpecific(void) const
861 {
862     return NonSpecific;
863 }
864 
865 inline
GetStop(void) const866 const char * CCleave::GetStop(void) const
867 {
868     return Stop;
869 }
870 
871 inline
SetStop(void)872 const char * & CCleave::SetStop(void)
873 {
874     return Stop;
875 }
876 
877 inline
GetCleaveNum(void) const878 int CCleave::GetCleaveNum(void) const
879 {
880     return kCleave;
881 }
882 
883 inline
GetCleaveOffset(void) const884 const char * CCleave::GetCleaveOffset(void) const
885 {
886     return CleaveOffset;
887 }
888 
889 inline
GetNMethionine(void) const890 bool CCleave::GetNMethionine(void) const
891 {
892     return NMethionine;
893 }
894 
895 
896 inline
SetNMethionine(void)897 bool& CCleave::SetNMethionine(void)
898 {
899     return NMethionine;
900 }
901 
902 
903 /////////////////// end of CCleave inline methods
904 
905 
906 
907 class NCBI_XOMSSA_EXPORT CCNBr: public CCleave {
908 public:
909     CCNBr(void);
910 };
911 
912 
913 class NCBI_XOMSSA_EXPORT CFormicAcid: public CCleave {
914 public:
915     CFormicAcid(void);
916 };
917 
918 
919 class NCBI_XOMSSA_EXPORT CTrypsin: public CCleave {
920 public:
921     CTrypsin(void);
922 };
923 
924 
925 class NCBI_XOMSSA_EXPORT CArgC: public CCleave {
926 public:
927     CArgC(void);
928 };
929 
930 
931 class NCBI_XOMSSA_EXPORT CChymotrypsin: public CCleave {
932 public:
933     CChymotrypsin(void);
934 };
935 
936 
937 class NCBI_XOMSSA_EXPORT CLysC: public CCleave {
938 public:
939     CLysC(void);
940 };
941 
942 
943 class NCBI_XOMSSA_EXPORT CLysCP: public CCleave {
944 public:
945     CLysCP(void);
946 };
947 
948 
949 class NCBI_XOMSSA_EXPORT CPepsinA: public CCleave {
950 public:
951     CPepsinA(void);
952 };
953 
954 
955 class NCBI_XOMSSA_EXPORT CTrypCNBr: public CCleave {
956 public:
957     CTrypCNBr(void);
958 };
959 
960 
961 class NCBI_XOMSSA_EXPORT CTrypChymo: public CCleave {
962 public:
963     CTrypChymo(void);
964 };
965 
966 
967 class NCBI_XOMSSA_EXPORT CTrypsinP: public CCleave {
968 public:
969     CTrypsinP(void);
970 };
971 
972 
973 //! whole protein (no cleavage)
974 class NCBI_XOMSSA_EXPORT CWholeProtein: public CCleave {
975 public:
976     CWholeProtein(void);
977 };
978 
979 
980 //! Asp-N, Nterm of D
981 class NCBI_XOMSSA_EXPORT CAspN: public CCleave {
982 public:
983     CAspN(void);
984 };
985 
986 
987 //! Glu-C, Cterm of E
988 class NCBI_XOMSSA_EXPORT CGluC: public CCleave {
989 public:
990     CGluC(void);
991 };
992 
993 //! Glu-C and Asp-N
994 class NCBI_XOMSSA_EXPORT CGluCAspN: public CCleave {
995 public:
996     CGluCAspN(void);
997 };
998 
999 
1000 /**
1001  * eMSEnzymes_top_down
1002  * top-down search of ETD spectra
1003  *
1004  */
1005 
1006 class NCBI_XOMSSA_EXPORT CTopDown: public CCleave {
1007 public:
1008     CTopDown(void);
1009 };
1010 
1011 
1012 /**
1013  * eMSEnzymes_semi_tryptic
1014  * semi tryptic search (one end of peptide has to be tryptic)
1015  *
1016  */
1017 
1018 class NCBI_XOMSSA_EXPORT CSemiTryptic: public CCleave {
1019 public:
1020     CSemiTryptic(void);
1021 };
1022 
1023 
1024 /**
1025  * eMSEnzymes_no_enzyme
1026  * search without enzyme (precursor mass only)
1027  *
1028  */
1029 
1030 class NCBI_XOMSSA_EXPORT CNoEnzyme: public CCleave {
1031 public:
1032     CNoEnzyme(void);
1033 };
1034 
1035 
1036 /**
1037  * eMSEnzymes_chymotrypsin_p
1038  * chymotrypsin without proline rule
1039  *
1040  */
1041 
1042 class NCBI_XOMSSA_EXPORT CChymoP: public CCleave {
1043 public:
1044     CChymoP(void);
1045 };
1046 
1047 /**
1048  * eMSEnzymes_aspn_de
1049  * Asp-N that cuts at D and E
1050  *
1051  */
1052 
1053 class NCBI_XOMSSA_EXPORT CAspNDE: public CCleave {
1054 public:
1055     CAspNDE(void);
1056 };
1057 
1058 
1059 /**
1060  * eMSEnzymes_gluc_de
1061  * Glu-C that cuts at D and E
1062  *
1063  */
1064 
1065 class NCBI_XOMSSA_EXPORT CGluCDE: public CCleave {
1066 public:
1067     CGluCDE(void);
1068 };
1069 
1070 
1071 
1072 /**
1073  * eMSEnzymes_lysn
1074  * Cuts N term of lysine
1075  *
1076  */
1077 
1078 class NCBI_XOMSSA_EXPORT CLysN: public CCleave {
1079 public:
1080     CLysN(void);
1081 };
1082 
1083 /**
1084  * eMSEnzymes_thermolysin_p
1085  * cuts N terminal at A,F,I,L,M or V
1086  *
1087  */
1088 
1089 class NCBI_XOMSSA_EXPORT CThermolysinP: public CCleave {
1090 public:
1091     CThermolysinP(void);
1092 };
1093 
1094 /**
1095  *
1096  * Chymotrypsin, cuts N terminal at A,F,I,L,M or V
1097  *
1098  */
1099 
1100 class NCBI_XOMSSA_EXPORT CSemiChymotrypsin: public CCleave {
1101 public:
1102     CSemiChymotrypsin(void);
1103 };
1104 
1105 /**
1106  *
1107  * Glu-C, Cterm of E
1108  *
1109  */
1110 
1111 class NCBI_XOMSSA_EXPORT CSemiGluC: public CCleave {
1112 public:
1113     CSemiGluC(void);
1114 };
1115 
1116 
1117 
1118 ///
1119 /// factory to return back object for enzyme
1120 ///
1121 
1122 class NCBI_XOMSSA_EXPORT CCleaveFactory
1123 {
1124 public:
1125   static CRef <CCleave> CleaveFactory(const EMSEnzymes enzyme);
1126 
1127 };
1128 
1129 
1130 END_SCOPE(omssa)
1131 END_SCOPE(objects)
1132 END_NCBI_SCOPE
1133 
1134 #endif
1135