1 /* 2 * Jalview - A Sequence Alignment Editor and Viewer (2.11.1.4) 3 * Copyright (C) 2021 The Jalview Authors 4 * 5 * This file is part of Jalview. 6 * 7 * Jalview is free software: you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation, either version 3 10 * of the License, or (at your option) any later version. 11 * 12 * Jalview is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty 14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 15 * PURPOSE. See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>. 19 * The Jalview Authors are detailed in the 'AUTHORS' file. 20 */ 21 package jalview.ws.seqfetcher; 22 23 import jalview.api.FeatureSettingsModelI; 24 import jalview.bin.Cache; 25 import jalview.datamodel.AlignmentI; 26 import jalview.datamodel.DBRefEntry; 27 import jalview.datamodel.SequenceI; 28 import jalview.util.DBRefUtils; 29 import jalview.util.MessageManager; 30 31 import java.util.ArrayList; 32 import java.util.Collections; 33 import java.util.Comparator; 34 import java.util.Enumeration; 35 import java.util.HashSet; 36 import java.util.Hashtable; 37 import java.util.List; 38 import java.util.Map; 39 import java.util.Stack; 40 import java.util.Vector; 41 42 public class ASequenceFetcher 43 { 44 45 /* 46 * set of databases we can retrieve entries from 47 */ 48 protected Hashtable<String, Map<String, DbSourceProxy>> fetchableDbs; 49 50 /* 51 * comparator to sort by tier (0/1/2) and name 52 */ 53 private Comparator<DbSourceProxy> proxyComparator; 54 55 /** 56 * Constructor 57 */ ASequenceFetcher()58 protected ASequenceFetcher() 59 { 60 super(); 61 62 /* 63 * comparator to sort proxies by tier and name 64 */ 65 proxyComparator = new Comparator<DbSourceProxy>() 66 { 67 @Override 68 public int compare(DbSourceProxy o1, DbSourceProxy o2) 69 { 70 /* 71 * Tier 0 precedes 1 precedes 2 72 */ 73 int compared = Integer.compare(o1.getTier(), o2.getTier()); 74 if (compared == 0) 75 { 76 // defend against NullPointer - should never happen 77 String o1Name = o1.getDbName(); 78 String o2Name = o2.getDbName(); 79 if (o1Name != null && o2Name != null) 80 { 81 compared = o1Name.compareToIgnoreCase(o2Name); 82 } 83 } 84 return compared; 85 } 86 }; 87 } 88 89 /** 90 * get array of supported Databases 91 * 92 * @return database source string for each database - only the latest version 93 * of a source db is bound to each source. 94 */ getSupportedDb()95 public String[] getSupportedDb() 96 { 97 if (fetchableDbs == null) 98 { 99 return null; 100 } 101 String[] sf = fetchableDbs.keySet() 102 .toArray(new String[fetchableDbs.size()]); 103 return sf; 104 } 105 isFetchable(String source)106 public boolean isFetchable(String source) 107 { 108 for (String db : fetchableDbs.keySet()) 109 { 110 if (source.equalsIgnoreCase(db)) 111 { 112 return true; 113 } 114 } 115 Cache.log.warn("isFetchable doesn't know about '" + source + "'"); 116 return false; 117 } 118 119 /** 120 * Fetch sequences for the given cross-references 121 * 122 * @param refs 123 * @param dna 124 * if true, only fetch from nucleotide data sources, else peptide 125 * @return 126 */ getSequences(List<DBRefEntry> refs, boolean dna)127 public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna) 128 { 129 Vector<SequenceI> rseqs = new Vector<>(); 130 Hashtable<String, List<String>> queries = new Hashtable<>(); 131 for (DBRefEntry ref : refs) 132 { 133 String canonical = DBRefUtils.getCanonicalName(ref.getSource()); 134 if (!queries.containsKey(canonical)) 135 { 136 queries.put(canonical, new ArrayList<String>()); 137 } 138 List<String> qset = queries.get(canonical); 139 if (!qset.contains(ref.getAccessionId())) 140 { 141 qset.add(ref.getAccessionId()); 142 } 143 } 144 Enumeration<String> e = queries.keys(); 145 while (e.hasMoreElements()) 146 { 147 List<String> query = null; 148 String db = null; 149 db = e.nextElement(); 150 query = queries.get(db); 151 if (!isFetchable(db)) 152 { 153 reportStdError(db, query, new Exception( 154 "Don't know how to fetch from this database :" + db)); 155 continue; 156 } 157 158 Stack<String> queriesLeft = new Stack<>(); 159 queriesLeft.addAll(query); 160 161 List<DbSourceProxy> proxies = getSourceProxy(db); 162 for (DbSourceProxy fetcher : proxies) 163 { 164 List<String> queriesMade = new ArrayList<>(); 165 HashSet<String> queriesFound = new HashSet<>(); 166 try 167 { 168 if (fetcher.isDnaCoding() != dna) 169 { 170 continue; // wrong sort of data 171 } 172 boolean doMultiple = fetcher.getMaximumQueryCount() > 1; 173 while (!queriesLeft.isEmpty()) 174 { 175 StringBuffer qsb = new StringBuffer(); 176 do 177 { 178 if (qsb.length() > 0) 179 { 180 qsb.append(fetcher.getAccessionSeparator()); 181 } 182 String q = queriesLeft.pop(); 183 queriesMade.add(q); 184 qsb.append(q); 185 } while (doMultiple && !queriesLeft.isEmpty()); 186 187 AlignmentI seqset = null; 188 try 189 { 190 // create a fetcher and go to it 191 seqset = fetcher.getSequenceRecords(qsb.toString()); 192 } catch (Exception ex) 193 { 194 System.err.println( 195 "Failed to retrieve the following from " + db); 196 System.err.println(qsb); 197 ex.printStackTrace(System.err); 198 } 199 // TODO: Merge alignment together - perhaps 200 if (seqset != null) 201 { 202 SequenceI seqs[] = seqset.getSequencesArray(); 203 if (seqs != null) 204 { 205 for (int is = 0; is < seqs.length; is++) 206 { 207 rseqs.addElement(seqs[is]); 208 List<DBRefEntry> frefs = DBRefUtils.searchRefs( 209 seqs[is].getDBRefs(), 210 new DBRefEntry(db, null, null)); 211 for (DBRefEntry dbr : frefs) 212 { 213 queriesFound.add(dbr.getAccessionId()); 214 queriesMade.remove(dbr.getAccessionId()); 215 } 216 seqs[is] = null; 217 } 218 } 219 else 220 { 221 if (fetcher.getRawRecords() != null) 222 { 223 System.out.println( 224 "# Retrieved from " + db + ":" + qsb.toString()); 225 StringBuffer rrb = fetcher.getRawRecords(); 226 /* 227 * for (int rr = 0; rr<rrb.length; rr++) { 228 */ 229 String hdr; 230 // if (rr<qs.length) 231 // { 232 hdr = "# " + db + ":" + qsb.toString(); 233 /* 234 * } else { hdr = "# part "+rr; } 235 */ 236 System.out.println(hdr); 237 if (rrb != null) 238 { 239 System.out.println(rrb); 240 } 241 System.out.println("# end of " + hdr); 242 } 243 244 } 245 } 246 247 } 248 } catch (Exception ex) 249 { 250 reportStdError(db, queriesMade, ex); 251 } 252 if (queriesMade.size() > 0) 253 { 254 System.out.println("# Adding " + queriesMade.size() 255 + " ids back to queries list for searching again (" + db 256 + ")"); 257 queriesLeft.addAll(queriesMade); 258 } 259 } 260 } 261 262 SequenceI[] result = null; 263 if (rseqs.size() > 0) 264 { 265 result = new SequenceI[rseqs.size()]; 266 int si = 0; 267 for (SequenceI s : rseqs) 268 { 269 result[si++] = s; 270 s.updatePDBIds(); 271 } 272 } 273 return result; 274 } 275 reportStdError(String db, List<String> queriesMade, Exception ex)276 public void reportStdError(String db, List<String> queriesMade, 277 Exception ex) 278 { 279 280 System.err.println( 281 "Failed to retrieve the following references from " + db); 282 int n = 0; 283 for (String qv : queriesMade) 284 { 285 System.err.print(" " + qv + ";"); 286 if (n++ > 10) 287 { 288 System.err.println(); 289 n = 0; 290 } 291 } 292 System.err.println(); 293 ex.printStackTrace(); 294 } 295 296 /** 297 * Returns a list of proxies for the given source 298 * 299 * @param db 300 * database source string TODO: add version string/wildcard for 301 * retrieval of specific DB source/version combinations. 302 * @return a list of DbSourceProxy for the db 303 */ getSourceProxy(String db)304 public List<DbSourceProxy> getSourceProxy(String db) 305 { 306 db = DBRefUtils.getCanonicalName(db); 307 Map<String, DbSourceProxy> dblist = fetchableDbs.get(db); 308 if (dblist == null) 309 { 310 return new ArrayList<>(); 311 } 312 313 /* 314 * sort so that primary sources precede secondary 315 */ 316 List<DbSourceProxy> dbs = new ArrayList<>(dblist.values()); 317 Collections.sort(dbs, proxyComparator); 318 return dbs; 319 } 320 321 /** 322 * constructs an instance of the proxy and registers it as a valid dbrefsource 323 * 324 * @param dbSourceProxy 325 * reference for class implementing 326 * jalview.ws.seqfetcher.DbSourceProxy 327 */ addDBRefSourceImpl( Class<? extends DbSourceProxy> dbSourceProxy)328 protected void addDBRefSourceImpl( 329 Class<? extends DbSourceProxy> dbSourceProxy) 330 throws IllegalArgumentException 331 { 332 DbSourceProxy proxy = null; 333 try 334 { 335 DbSourceProxy proxyObj = dbSourceProxy.getConstructor().newInstance(); 336 proxy = proxyObj; 337 } catch (IllegalArgumentException e) 338 { 339 throw e; 340 } catch (Exception e) 341 { 342 // Serious problems if this happens. 343 throw new Error(MessageManager 344 .getString("error.dbrefsource_implementation_exception"), e); 345 } 346 addDbRefSourceImpl(proxy); 347 } 348 349 /** 350 * add the properly initialised DbSourceProxy object 'proxy' to the list of 351 * sequence fetchers 352 * 353 * @param proxy 354 */ addDbRefSourceImpl(DbSourceProxy proxy)355 protected void addDbRefSourceImpl(DbSourceProxy proxy) 356 { 357 if (proxy != null) 358 { 359 if (fetchableDbs == null) 360 { 361 fetchableDbs = new Hashtable<>(); 362 } 363 Map<String, DbSourceProxy> slist = fetchableDbs 364 .get(proxy.getDbSource()); 365 if (slist == null) 366 { 367 fetchableDbs.put(proxy.getDbSource(), 368 slist = new Hashtable<>()); 369 } 370 slist.put(proxy.getDbName(), proxy); 371 } 372 } 373 374 /** 375 * select sources which are implemented by instances of the given class 376 * 377 * @param class 378 * that implements DbSourceProxy 379 * @return null or vector of source names for fetchers 380 */ getDbInstances(Class class1)381 public String[] getDbInstances(Class class1) 382 { 383 if (!DbSourceProxy.class.isAssignableFrom(class1)) 384 { 385 throw new Error(MessageManager.formatMessage( 386 "error.implementation_error_dbinstance_must_implement_interface", 387 new String[] 388 { class1.toString() })); 389 } 390 if (fetchableDbs == null) 391 { 392 return null; 393 } 394 String[] sources = null; 395 Vector<String> src = new Vector<>(); 396 Enumeration<String> dbs = fetchableDbs.keys(); 397 while (dbs.hasMoreElements()) 398 { 399 String dbn = dbs.nextElement(); 400 for (DbSourceProxy dbp : fetchableDbs.get(dbn).values()) 401 { 402 if (class1.isAssignableFrom(dbp.getClass())) 403 { 404 src.addElement(dbn); 405 } 406 } 407 } 408 if (src.size() > 0) 409 { 410 src.copyInto(sources = new String[src.size()]); 411 } 412 return sources; 413 } 414 getDbSourceProxyInstances(Class class1)415 public DbSourceProxy[] getDbSourceProxyInstances(Class class1) 416 { 417 List<DbSourceProxy> prlist = new ArrayList<>(); 418 for (String fetchable : getSupportedDb()) 419 { 420 for (DbSourceProxy pr : getSourceProxy(fetchable)) 421 { 422 if (class1.isInstance(pr)) 423 { 424 prlist.add(pr); 425 } 426 } 427 } 428 if (prlist.size() == 0) 429 { 430 return null; 431 } 432 return prlist.toArray(new DbSourceProxy[0]); 433 } 434 435 /** 436 * Returns a preferred feature colouring scheme for the given source, or null 437 * if none is defined. 438 * 439 * @param source 440 * @return 441 */ getFeatureColourScheme(String source)442 public FeatureSettingsModelI getFeatureColourScheme(String source) 443 { 444 /* 445 * return the first non-null colour scheme for any proxy for 446 * this database source 447 */ 448 for (DbSourceProxy proxy : getSourceProxy(source)) 449 { 450 FeatureSettingsModelI preferredColours = proxy 451 .getFeatureColourScheme(); 452 if (preferredColours != null) 453 { 454 return preferredColours; 455 } 456 } 457 return null; 458 } 459 } 460