1 /* 2 * Jalview - A Sequence Alignment Editor and Viewer (2.11.1.4) 3 * Copyright (C) 2021 The Jalview Authors 4 * 5 * This file is part of Jalview. 6 * 7 * Jalview is free software: you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation, either version 3 10 * of the License, or (at your option) any later version. 11 * 12 * Jalview is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty 14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 15 * PURPOSE. See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>. 19 * The Jalview Authors are detailed in the 'AUTHORS' file. 20 */ 21 package jalview.io; 22 23 import jalview.datamodel.SequenceI; 24 25 public class ModellerDescription 26 { 27 /** 28 * Translates between a String containing a set of colon-separated values on a 29 * single line, and sequence start/end and other properties. See PIRFile IO 30 * for its use. 31 */ 32 final String[] seqTypes = { "sequence", "structure", "structureX", 33 "structureN" }; 34 35 final String[] Fields = { "objectType", "objectId", "startField", 36 "startCode", "endField", "endCode", "description1", "description2", 37 "resolutionField", "tailField" }; 38 39 final int TYPE = 0; 40 41 final int LOCALID = 1; 42 43 final int START = 2; 44 45 final int START_CHAIN = 3; 46 47 final int END = 4; 48 49 final int END_CHAIN = 5; 50 51 final int DESCRIPTION1 = 6; 52 53 final int DESCRIPTION2 = 7; 54 55 final int RESOLUTION = 8; 56 57 final int TAIL = 9; 58 59 /** 60 * 0 is free text or empty 1 is something that parses to an integer, or \@ 61 */ 62 final int Types[] = { 0, 0, 1, 0, 1, 0, 0, 0, 0, 0 }; 63 64 final char Padding[] = { ' ', ' ', ' ', '.', ' ', '.', '.', '.', '.', 65 '.' }; 66 67 java.util.Hashtable fields = new java.util.Hashtable(); 68 ModellerDescription()69 ModellerDescription() 70 { 71 fields.put(Fields[TAIL], ""); 72 } 73 74 class resCode 75 { 76 Integer val; 77 78 String field; 79 resCode(String f, Integer v)80 resCode(String f, Integer v) 81 { 82 val = v; 83 field = f; 84 } 85 resCode(int v)86 resCode(int v) 87 { 88 val = Integer.valueOf(v); 89 field = val.toString(); 90 } 91 }; 92 validResidueCode(String field)93 private resCode validResidueCode(String field) 94 { 95 Integer val = null; 96 com.stevesoft.pat.Regex r = new com.stevesoft.pat.Regex( 97 "\\s*((([-0-9]+).?)|FIRST|LAST|@)"); 98 99 if (!r.search(field)) 100 { 101 return null; // invalid 102 } 103 String value = r.stringMatched(3); 104 if (value == null) 105 { 106 value = r.stringMatched(1); 107 } 108 // jalview.bin.Cache.log.debug("from '" + field + "' matched '" + value + 109 // "'"); 110 try 111 { 112 val = Integer.valueOf(value); 113 return new resCode(field, val); // successful numeric extraction 114 } catch (Exception e) 115 { 116 } 117 return new resCode(field, null); 118 } 119 parseDescription(String desc)120 private java.util.Hashtable parseDescription(String desc) 121 { 122 java.util.Hashtable fields = new java.util.Hashtable(); 123 java.util.StringTokenizer st = new java.util.StringTokenizer(desc, ":", 124 true); 125 126 String field; 127 int type = -1; 128 if (st.countTokens() > 0) 129 { 130 // parse colon-fields 131 int i = 0; 132 field = st.nextToken(":"); 133 do 134 { 135 if (seqTypes[i].equalsIgnoreCase(field)) 136 { 137 break; 138 } 139 } while (++i < seqTypes.length); 140 141 if (i < seqTypes.length) 142 { 143 st.nextToken(); // skip ':' 144 // valid seqType for modeller 145 type = i; 146 i = 1; // continue parsing fields 147 while (i < TAIL && st.hasMoreTokens()) 148 { 149 if ((field = st.nextToken(":")) != null) 150 { 151 if (!field.equals(":")) 152 { 153 // validate residue field value 154 if (Types[i] == 1) 155 { 156 resCode val = validResidueCode(field); 157 if (val != null) 158 { 159 fields.put(new String(Fields[i] + "num"), val); 160 } 161 else 162 { 163 // jalview.bin.Cache.log.debug( 164 // "Ignoring non-Modeller description: invalid integer-like 165 // field '" + field + "'"); 166 type = -1; /* invalid field! - throw the FieldSet away */ 167 } 168 ; 169 } 170 fields.put(Fields[i++], field); 171 if (st.hasMoreTokens()) 172 { 173 st.nextToken(); // skip token sep. 174 } 175 } 176 else 177 { 178 i++; 179 } 180 } 181 } 182 if (i == TAIL) 183 { 184 // slurp remaining fields 185 while (st.hasMoreTokens()) 186 { 187 String tl = st.nextToken(":"); 188 field += tl.equals(":") ? tl : (":" + tl); 189 } 190 fields.put(Fields[TAIL], field); 191 } 192 } 193 } 194 if (type == -1) 195 { 196 // object is not a proper ModellerPIR object 197 fields = new java.util.Hashtable(); 198 fields.put(Fields[TAIL], new String(desc)); 199 } 200 else 201 { 202 fields.put(Fields[TYPE], seqTypes[type]); 203 } 204 return fields; 205 } 206 ModellerDescription(String desc)207 ModellerDescription(String desc) 208 { 209 if (desc == null) 210 { 211 desc = ""; 212 } 213 fields = parseDescription(desc); 214 } 215 setStartCode(int v)216 void setStartCode(int v) 217 { 218 resCode r; 219 fields.put(Fields[START] + "num", r = new resCode(v)); 220 fields.put(Fields[START], r.field); 221 } 222 setEndCode(int v)223 void setEndCode(int v) 224 { 225 resCode r; 226 fields.put(Fields[END] + "num", r = new resCode(v)); 227 fields.put(Fields[END], r.field); 228 } 229 230 /** 231 * make a possibly updated modeller field line for the sequence object 232 * 233 * @param seq 234 * SequenceI 235 */ ModellerDescription(SequenceI seq)236 ModellerDescription(SequenceI seq) 237 { 238 239 if (seq.getDescription() != null) 240 { 241 fields = parseDescription(seq.getDescription()); 242 } 243 244 if (isModellerFieldset()) 245 { 246 // Set start and end before we update the type (in the case of a 247 // synthesized field set) 248 if (getStartCode() == null || (getStartNum() != seq.getStart() 249 && getStartCode().val != null)) 250 { 251 // unset or user updated sequence start position 252 setStartCode(seq.getStart()); 253 } 254 255 if (getEndCode() == null || (getEndNum() != seq.getEnd() 256 && getStartCode() != null && getStartCode().val != null)) 257 { 258 setEndCode(seq.getEnd()); 259 } 260 } 261 else 262 { 263 // synthesize fields 264 setStartCode(seq.getStart()); 265 setEndCode(seq.getEnd()); 266 fields.put(Fields[LOCALID], seq.getName()); // this may be overwritten 267 // below... 268 // type - decide based on evidence of PDB database references - this also 269 // sets the local reference field 270 int t = 0; // sequence 271 if (seq.getDatasetSequence() != null 272 && seq.getDatasetSequence().getDBRefs() != null) 273 { 274 jalview.datamodel.DBRefEntry[] dbr = seq.getDatasetSequence() 275 .getDBRefs(); 276 int i, j; 277 for (i = 0, j = dbr.length; i < j; i++) 278 { 279 if (dbr[i] != null) 280 { 281 // JBPNote PDB dbRefEntry needs properties to propagate onto 282 // ModellerField 283 // JBPNote Need to get info from the user about whether the sequence 284 // is the one being modelled, or if it is a template. 285 if (dbr[i].getSource() 286 .equals(jalview.datamodel.DBRefSource.PDB)) 287 { 288 fields.put(Fields[LOCALID], dbr[i].getAccessionId()); 289 t = 2; 290 break; 291 } 292 } 293 } 294 } 295 fields.put(Fields[TYPE], seqTypes[t]); 296 } 297 298 } 299 300 /** 301 * Indicate if fields parsed to a modeller-like colon-separated value line 302 * 303 * @return boolean 304 */ isModellerFieldset()305 boolean isModellerFieldset() 306 { 307 return (fields.containsKey(Fields[TYPE])); 308 } 309 getDescriptionLine()310 String getDescriptionLine() 311 { 312 String desc = ""; 313 int lastfield = Fields.length - 1; 314 315 if (isModellerFieldset()) 316 { 317 String value; 318 // try to write a minimal modeller field set, so.. 319 320 // find the last valid field in the entry 321 322 for (; lastfield > 6; lastfield--) 323 { 324 if (fields.containsKey(Fields[lastfield])) 325 { 326 break; 327 } 328 } 329 330 for (int i = 0; i < lastfield; i++) 331 { 332 value = (String) fields.get(Fields[i]); 333 if (value != null && value.length() > 0) 334 { 335 desc += ((String) fields.get(Fields[i])) + ":"; 336 } 337 else 338 { 339 desc += Padding[i] + ":"; 340 } 341 } 342 } 343 // just return the last field if no others were defined. 344 if (fields.containsKey(Fields[lastfield])) 345 { 346 desc += (String) fields.get(Fields[lastfield]); 347 } 348 else 349 { 350 desc += "."; 351 } 352 return desc; 353 } 354 getStartNum()355 int getStartNum() 356 { 357 int start = 0; 358 resCode val = getStartCode(); 359 if (val != null && val.val != null) 360 { 361 return val.val.intValue(); 362 } 363 return start; 364 } 365 getStartCode()366 resCode getStartCode() 367 { 368 if (isModellerFieldset() && fields.containsKey(Fields[START] + "num")) 369 { 370 return (resCode) fields.get(Fields[START] + "num"); 371 } 372 return null; 373 } 374 getEndCode()375 resCode getEndCode() 376 { 377 if (isModellerFieldset() && fields.containsKey(Fields[END] + "num")) 378 { 379 return (resCode) fields.get(Fields[END] + "num"); 380 } 381 return null; 382 } 383 getEndNum()384 int getEndNum() 385 { 386 int end = 0; 387 resCode val = getEndCode(); 388 if (val != null && val.val != null) 389 { 390 return val.val.intValue(); 391 } 392 return end; 393 } 394 395 /** 396 * returns true if sequence object was modifed with a valid modellerField set 397 * 398 * @param newSeq 399 * SequenceI 400 * @return boolean 401 */ updateSequenceI(SequenceI newSeq)402 boolean updateSequenceI(SequenceI newSeq) 403 { 404 if (isModellerFieldset()) 405 { 406 resCode rc = getStartCode(); 407 if (rc != null && rc.val != null) 408 { 409 newSeq.setStart(getStartNum()); 410 } 411 else 412 { 413 newSeq.setStart(1); 414 } 415 rc = getEndCode(); 416 if (rc != null && rc.val != null) 417 { 418 newSeq.setEnd(getEndNum()); 419 } 420 else 421 { 422 newSeq.setEnd(newSeq.getStart() + newSeq.getLength()); 423 } 424 return true; 425 } 426 return false; 427 } 428 } 429