1 /**
2  * Author: Mark Larkin
3  *
4  * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
5  *
6  * Changes:
7  *
8  * 13-02-07,Nigel Brown(EMBL): Increased maximum sequence identifier
9  * width MAXNAMES from 30 to 150.
10  * 20-12-07, Paul McGettigan: bug #53 change MAXNAMESTODISPLAY back to 10 from 30
11  */
12 
13 #ifndef CLUSTALW_H
14 #define CLUSTALW_H
15 /* Mark tidy up Nov 2005 */
16 /*********************CLUSTALW.H*********************************************/
17 /****************************************************************************/
18 
19 /*
20  *** AW NOT NEEDED ANYMORE since 2.0.9
21  *** done via including config.h(clustalw) or DEFINE(clustalx)
22  * Main header file for ClustalW.  Uncomment ONE of the following 4 lines
23  * depending on which compiler you wish to use.
24  */
25 /* NOT SUPPORTED #define VMS 1                 VAX or ALPHA VMS */
26 /* Think_C for Macintosh */
27 //#define MAC 1 */
28 /* Turbo C for PC's */
29 // #define WINDOWS 1
30 /* unix */
31 //#define UNIX 1
32 // d
33 
34 
35 #define DEBUGFULL 0
36 const bool DEBUGLOG = false;
37 /***************************************************************************/
38 /***************************************************************************/
39 
40 
41 #include "general.h"
42 #include "Array2D.h"
43 #include "SquareMat.h"
44 #include "SymMatrix.h"
45 #include <vector>
46 #include <string>
47 using namespace std;
48 namespace clustalw
49 {
50 
51 typedef SymMatrix DistMatrix;
52 typedef std::vector<vector <int> > TreeGroups;
53 
54 struct TreeNames
55 {
56     string phylipName;
57     string clustalName;
58     string distName;
59     string nexusName;
60     string pimName;
61 };
62 
63 struct AlignmentFileNames
64 {
65     string treeFile;
66     string profile2TreeFile;
67     string clustalFile;
68     string nrbfFile;
69     string gcgFile;
70     string phylipFile;
71     string gdeFile;
72     string nexusFile;
73     string fastaFile;
74 };
75 
76 struct TreeNode
77 {
78     // phylogenetic tree structure
79     struct TreeNode *left;
80     struct TreeNode *right;
81     struct TreeNode *parent;
82     float dist;
83     int leaf;
84     int order;
85     string name;
86 };
87 
88 struct PhyloTree
89 {
90     TreeGroups treeDesc;
91     vector<double> leftBranch;
92     vector<double> rightBranch;
93 };
94 struct SeqInfo
95 {
96     int firstSeq;
97     int lastSeq;
98     int numSeqs;
99 };
100 
101 struct LowScoreSegParams
102 {
103     int firstSeq;
104     int nSeqs;
105     int lastSeq;
106     int nCols;
107     vector<int>* seqWeight;
108     Array2D<int>* lowScoreRes;
109     bool seqWeightCalculated;
110 };
111 /* Global constants */
112 const int extraEndElemNum = 2;
113 const int ENDALN = 127;
114 const int OK = -200;
115 const int CANNOTOPENFILE = -300;
116 const int NOSEQUENCESINFILE = -400;
117 const int OTHERERROR = -500;
118 const int ALLNAMESNOTDIFFERENT = -600;
119 const int MUSTREADINPROFILE1FIRST = -700;
120 const int EMPTYSEQUENCE = -800;
121 const int SEQUENCETOOBIG = -900;
122 const int BADFORMAT = -1000;
123 
124 const int AABLOSUM = 0;
125 const int AAPAM = 1;
126 const int AAGONNET = 2;
127 const int AAIDENTITY = 3;
128 const int AAUSERDEFINED = 4;
129 
130 const int PWAABLOSUM = 0;
131 const int PWAAPAM = 1;
132 const int PWAAGONNET = 2;
133 const int PWAAIDENTITY = 3;
134 const int PWAAUSER = 4;
135 
136 const int DNAIUB = 0;
137 const int DNACLUSTALW = 1;
138 const int DNAUSERDEFINED = 2;
139 
140 const int AAHISTIDENTITY = 0;
141 const int AAHISTGONNETPAM80 = 1;
142 const int AAHISTGONNETPAM120 = 2;
143 const int AAHISTGONNETPAM250 = 3;
144 const int AAHISTGONNETPAM350 = 4;
145 const int AAHISTUSER = 5;
146 
147 const int QTAASEGGONNETPAM80 = 0;
148 const int QTAASEGGONNETPAM120 = 1;
149 const int QTAASEGGONNETPAM250 = 2;
150 const int QTAASEGGONNETPAM350 = 3;
151 const int QTAASEGUSER = 4;
152 
153 const int MAXHYDRESIDUES = 9; // Only allowing 9 hyd residue choices
154 const int Protein = 0;
155 const int DNA = 1;
156 const int Pairwise = 0;
157 const int MultipleAlign = 1;
158 
159 const int OUTSECST = 0;
160 const int OUTGAP = 1;
161 const int OUTBOTH = 2;
162 const int OUTNONE = 3;
163 
164 const int MAXNAMES = 150;    /* Max chars read for seq. names */ //nige, was 30
165 //const int MAXNAMESTODISPLAY = 30; // Used for printout. Mark 18-7-07
166 //const int MAXNAMESTODISPLAY = 10; // Bug #53. Paul 20-12-07
167 const int MAXNAMESTODISPLAY = 30; //Paul replicate 1.83 behavour 9-2-08
168 const int MINNAMESTODISPLAY = 10; //Paul replicate 1.83 behavour 9-2-08
169 const int MAXTITLES = 60;      /* Title length */
170 const int FILENAMELEN = 256;             /* Max. file name length */
171 
172 const int  UNKNOWN  = 0;
173 const int  EMBLSWISS = 1;
174 const int  PIR      = 2;
175 const int  PEARSON  = 3;
176 const int  GDE = 4;
177 const int  CLUSTAL = 5;    /* DES */
178 const int  MSF = 6; /* DES */
179 const int  RSF = 7;    /* JULIE */
180 const int  USER = 8;    /* DES */
181 const int  PHYLIP = 9;    /* DES */
182 const int  NEXUS = 10; /* DES */
183 const int  FASTA = 11; /* Ramu */
184 
185 const int  NONE = 0;
186 const int  SECST = 1;
187 const int  GMASK = 2;
188 
189 const int  PROFILE = 0;
190 const int  SEQUENCE = 1;
191 
192 const int  BS_NODE_LABELS = 2;
193 const int  BS_BRANCH_LABELS = 1;
194 
195 const int  PAGE_LEN = 22;   /* Number of lines of help sent to screen */
196 
197 const int  PAGEWIDTH = 80;  /* maximum characters on output file page */
198 const int  LINELENGTH = 60;  /* Output file line length */
199 const int  GCG_LINELENGTH = 50;
200 
201 const int NJ = 1;
202 const int UPGMA = 2;
203 
204 const int ALIGNMENT = 1;
205 const int TREE = 2;
206 
207 const int MinIdentifier = 1;
208 
209 const string VALID_COMMAND_SEP = "-/";
210 
211 #ifdef OS_MAC
212     const char default_commandsep = '-';
213     const char DIRDELIM = '/';
214     const int INT_SCALE_FACTOR = 100;  /* Scaling factor to convert float to integer
215         for profile scores */
216 
217 #elif OS_WINDOWS
218     const char  default_commandsep = '/';
219     const char DIRDELIM = '\\';
220     const int INT_SCALE_FACTOR = 100;  /* Scaling factor to convert float to integer
221         for profile scores */
222 
223 #elif OS_UNIX
224     const char default_commandsep = '-';
225     const char DIRDELIM = '/';
226     const int INT_SCALE_FACTOR = 1000; /* Scaling factor to convert float to integer
227         for profile scores */
228 #endif
229 
230 
231 const int NUMRES = 32; /* max size of comparison matrix */
232 const int INPUT = 0;
233 const int ALIGNED = 1;
234 
235 const int LEFT = 1;
236 const int RIGHT = 2;
237 
238 const int NODE = 0;
239 const int LEAF = 1;
240 
241 const int GAPCOL = 32;        /* position of gap open penalty in profile */
242 const int LENCOL = 33;        /* position of gap extension penalty in profile */
243 
244 typedef struct
245 {
246    /* Holds values for the pairwise scales */
247    float gapOpenScale;
248    float gapExtendScale;
249    int intScale;
250 }PairScaleValues;
251 
252 typedef struct
253 {
254     float scale;
255     float intScale;
256 }PrfScaleValues;
257 
258 typedef struct node
259 {
260      /* phylogenetic tree structure */
261     struct node *left;
262     struct node *right;
263     struct node *parent;
264     float dist;
265     int leaf;
266     int order;
267     char name[64];
268 } stree,  *treeptr;
269 
270 typedef struct
271 {
272     char title[30];
273     char string[30];
274 } MatMenuEntry;
275 
276 typedef struct
277 {
278     int noptions;
279     MatMenuEntry opt[10];
280 } MatMenu;
281 
282 const int MAXMAT = 10;
283 
284 typedef struct
285 {
286     int llimit;
287     int ulimit;
288     vector<short>* matptr;
289     vector<short>* AAXref;
290 } SeriesMat;
291 
292 /*
293  * UserMatSeries holds the number of matrices in the series and
294  */
295 typedef struct
296 {
297     int nmat;
298     SeriesMat mat[MAXMAT];
299 } UserMatrixSeries;
300 
301 }
302 #endif
303 
304