1 /** 2 * Author: Mark Larkin 3 * 4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. 5 * 6 * Changes: 7 * 8 * 13-02-07,Nigel Brown(EMBL): Increased maximum sequence identifier 9 * width MAXNAMES from 30 to 150. 10 * 20-12-07, Paul McGettigan: bug #53 change MAXNAMESTODISPLAY back to 10 from 30 11 */ 12 13 #ifndef CLUSTALW_H 14 #define CLUSTALW_H 15 /* Mark tidy up Nov 2005 */ 16 /*********************CLUSTALW.H*********************************************/ 17 /****************************************************************************/ 18 19 /* 20 *** AW NOT NEEDED ANYMORE since 2.0.9 21 *** done via including config.h(clustalw) or DEFINE(clustalx) 22 * Main header file for ClustalW. Uncomment ONE of the following 4 lines 23 * depending on which compiler you wish to use. 24 */ 25 /* NOT SUPPORTED #define VMS 1 VAX or ALPHA VMS */ 26 /* Think_C for Macintosh */ 27 //#define MAC 1 */ 28 /* Turbo C for PC's */ 29 // #define WINDOWS 1 30 /* unix */ 31 //#define UNIX 1 32 // d 33 34 35 #define DEBUGFULL 0 36 const bool DEBUGLOG = false; 37 /***************************************************************************/ 38 /***************************************************************************/ 39 40 41 #include "general.h" 42 #include "Array2D.h" 43 #include "SquareMat.h" 44 #include "SymMatrix.h" 45 #include <vector> 46 #include <string> 47 using namespace std; 48 namespace clustalw 49 { 50 51 typedef SymMatrix DistMatrix; 52 typedef std::vector<vector <int> > TreeGroups; 53 54 struct TreeNames 55 { 56 string phylipName; 57 string clustalName; 58 string distName; 59 string nexusName; 60 string pimName; 61 }; 62 63 struct AlignmentFileNames 64 { 65 string treeFile; 66 string profile2TreeFile; 67 string clustalFile; 68 string nrbfFile; 69 string gcgFile; 70 string phylipFile; 71 string gdeFile; 72 string nexusFile; 73 string fastaFile; 74 }; 75 76 struct TreeNode 77 { 78 // phylogenetic tree structure 79 struct TreeNode *left; 80 struct TreeNode *right; 81 struct TreeNode *parent; 82 float dist; 83 int leaf; 84 int order; 85 string name; 86 }; 87 88 struct PhyloTree 89 { 90 TreeGroups treeDesc; 91 vector<double> leftBranch; 92 vector<double> rightBranch; 93 }; 94 struct SeqInfo 95 { 96 int firstSeq; 97 int lastSeq; 98 int numSeqs; 99 }; 100 101 struct LowScoreSegParams 102 { 103 int firstSeq; 104 int nSeqs; 105 int lastSeq; 106 int nCols; 107 vector<int>* seqWeight; 108 Array2D<int>* lowScoreRes; 109 bool seqWeightCalculated; 110 }; 111 /* Global constants */ 112 const int extraEndElemNum = 2; 113 const int ENDALN = 127; 114 const int OK = -200; 115 const int CANNOTOPENFILE = -300; 116 const int NOSEQUENCESINFILE = -400; 117 const int OTHERERROR = -500; 118 const int ALLNAMESNOTDIFFERENT = -600; 119 const int MUSTREADINPROFILE1FIRST = -700; 120 const int EMPTYSEQUENCE = -800; 121 const int SEQUENCETOOBIG = -900; 122 const int BADFORMAT = -1000; 123 124 const int AABLOSUM = 0; 125 const int AAPAM = 1; 126 const int AAGONNET = 2; 127 const int AAIDENTITY = 3; 128 const int AAUSERDEFINED = 4; 129 130 const int PWAABLOSUM = 0; 131 const int PWAAPAM = 1; 132 const int PWAAGONNET = 2; 133 const int PWAAIDENTITY = 3; 134 const int PWAAUSER = 4; 135 136 const int DNAIUB = 0; 137 const int DNACLUSTALW = 1; 138 const int DNAUSERDEFINED = 2; 139 140 const int AAHISTIDENTITY = 0; 141 const int AAHISTGONNETPAM80 = 1; 142 const int AAHISTGONNETPAM120 = 2; 143 const int AAHISTGONNETPAM250 = 3; 144 const int AAHISTGONNETPAM350 = 4; 145 const int AAHISTUSER = 5; 146 147 const int QTAASEGGONNETPAM80 = 0; 148 const int QTAASEGGONNETPAM120 = 1; 149 const int QTAASEGGONNETPAM250 = 2; 150 const int QTAASEGGONNETPAM350 = 3; 151 const int QTAASEGUSER = 4; 152 153 const int MAXHYDRESIDUES = 9; // Only allowing 9 hyd residue choices 154 const int Protein = 0; 155 const int DNA = 1; 156 const int Pairwise = 0; 157 const int MultipleAlign = 1; 158 159 const int OUTSECST = 0; 160 const int OUTGAP = 1; 161 const int OUTBOTH = 2; 162 const int OUTNONE = 3; 163 164 const int MAXNAMES = 150; /* Max chars read for seq. names */ //nige, was 30 165 //const int MAXNAMESTODISPLAY = 30; // Used for printout. Mark 18-7-07 166 //const int MAXNAMESTODISPLAY = 10; // Bug #53. Paul 20-12-07 167 const int MAXNAMESTODISPLAY = 30; //Paul replicate 1.83 behavour 9-2-08 168 const int MINNAMESTODISPLAY = 10; //Paul replicate 1.83 behavour 9-2-08 169 const int MAXTITLES = 60; /* Title length */ 170 const int FILENAMELEN = 256; /* Max. file name length */ 171 172 const int UNKNOWN = 0; 173 const int EMBLSWISS = 1; 174 const int PIR = 2; 175 const int PEARSON = 3; 176 const int GDE = 4; 177 const int CLUSTAL = 5; /* DES */ 178 const int MSF = 6; /* DES */ 179 const int RSF = 7; /* JULIE */ 180 const int USER = 8; /* DES */ 181 const int PHYLIP = 9; /* DES */ 182 const int NEXUS = 10; /* DES */ 183 const int FASTA = 11; /* Ramu */ 184 185 const int NONE = 0; 186 const int SECST = 1; 187 const int GMASK = 2; 188 189 const int PROFILE = 0; 190 const int SEQUENCE = 1; 191 192 const int BS_NODE_LABELS = 2; 193 const int BS_BRANCH_LABELS = 1; 194 195 const int PAGE_LEN = 22; /* Number of lines of help sent to screen */ 196 197 const int PAGEWIDTH = 80; /* maximum characters on output file page */ 198 const int LINELENGTH = 60; /* Output file line length */ 199 const int GCG_LINELENGTH = 50; 200 201 const int NJ = 1; 202 const int UPGMA = 2; 203 204 const int ALIGNMENT = 1; 205 const int TREE = 2; 206 207 const int MinIdentifier = 1; 208 209 const string VALID_COMMAND_SEP = "-/"; 210 211 #ifdef OS_MAC 212 const char default_commandsep = '-'; 213 const char DIRDELIM = '/'; 214 const int INT_SCALE_FACTOR = 100; /* Scaling factor to convert float to integer 215 for profile scores */ 216 217 #elif OS_WINDOWS 218 const char default_commandsep = '/'; 219 const char DIRDELIM = '\\'; 220 const int INT_SCALE_FACTOR = 100; /* Scaling factor to convert float to integer 221 for profile scores */ 222 223 #elif OS_UNIX 224 const char default_commandsep = '-'; 225 const char DIRDELIM = '/'; 226 const int INT_SCALE_FACTOR = 1000; /* Scaling factor to convert float to integer 227 for profile scores */ 228 #endif 229 230 231 const int NUMRES = 32; /* max size of comparison matrix */ 232 const int INPUT = 0; 233 const int ALIGNED = 1; 234 235 const int LEFT = 1; 236 const int RIGHT = 2; 237 238 const int NODE = 0; 239 const int LEAF = 1; 240 241 const int GAPCOL = 32; /* position of gap open penalty in profile */ 242 const int LENCOL = 33; /* position of gap extension penalty in profile */ 243 244 typedef struct 245 { 246 /* Holds values for the pairwise scales */ 247 float gapOpenScale; 248 float gapExtendScale; 249 int intScale; 250 }PairScaleValues; 251 252 typedef struct 253 { 254 float scale; 255 float intScale; 256 }PrfScaleValues; 257 258 typedef struct node 259 { 260 /* phylogenetic tree structure */ 261 struct node *left; 262 struct node *right; 263 struct node *parent; 264 float dist; 265 int leaf; 266 int order; 267 char name[64]; 268 } stree, *treeptr; 269 270 typedef struct 271 { 272 char title[30]; 273 char string[30]; 274 } MatMenuEntry; 275 276 typedef struct 277 { 278 int noptions; 279 MatMenuEntry opt[10]; 280 } MatMenu; 281 282 const int MAXMAT = 10; 283 284 typedef struct 285 { 286 int llimit; 287 int ulimit; 288 vector<short>* matptr; 289 vector<short>* AAXref; 290 } SeriesMat; 291 292 /* 293 * UserMatSeries holds the number of matrices in the series and 294 */ 295 typedef struct 296 { 297 int nmat; 298 SeriesMat mat[MAXMAT]; 299 } UserMatrixSeries; 300 301 } 302 #endif 303 304