1# #START_LICENSE########################################################### 2# 3# 4# This file is part of the Environment for Tree Exploration program 5# (ETE). http://etetoolkit.org 6# 7# ETE is free software: you can redistribute it and/or modify it 8# under the terms of the GNU General Public License as published by 9# the Free Software Foundation, either version 3 of the License, or 10# (at your option) any later version. 11# 12# ETE is distributed in the hope that it will be useful, but WITHOUT 13# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15# License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with ETE. If not, see <http://www.gnu.org/licenses/>. 19# 20# 21# ABOUT THE ETE PACKAGE 22# ===================== 23# 24# ETE is distributed under the GPL copyleft license (2008-2015). 25# 26# If you make use of ETE in published work, please cite: 27# 28# Jaime Huerta-Cepas, Joaquin Dopazo and Toni Gabaldon. 29# ETE: a python Environment for Tree Exploration. Jaime BMC 30# Bioinformatics 2010,:24doi:10.1186/1471-2105-11-24 31# 32# Note that extra references to the specific methods implemented in 33# the toolkit may be available in the documentation. 34# 35# More info at http://etetoolkit.org. Contact: huerta@embl.de 36# 37# 38# #END_LICENSE############################################################# 39from __future__ import absolute_import 40from __future__ import print_function 41 42import os 43from ...utils import print_table 44from .configobj import ConfigObj 45from .errors import ConfigError 46from .utils import colorify 47from .apps import APP2CLASS, OPTION2APPTYPE, APPTYPES 48 49import six 50from six.moves import map 51 52def build_supermatrix_workflow(wname): 53 try: 54 cog_selector, alg_concatenator, treebuilder = map(lambda x: "@%s" %x, wname.split("-")) 55 except ValueError: 56 raise ConfigError("Invalid supermatrix workflow: %s" %wname) 57 58 workflow = {wname: { 59 "_app": "supermatrix", 60 "_cog_selector": cog_selector, 61 "_alg_concatenator": alg_concatenator, 62 "_aa_tree_builder": treebuilder, 63 "_nt_tree_builder": treebuilder, 64 "_appset":"@builtin_apps"} 65 } 66 return workflow 67 68def build_genetree_workflow(wname): 69 try: 70 aligner, trimmer, modeltester, treebuilder = map(lambda x: "none" if x.lower() == "none" else "@%s"%x, 71 wname.split("-")) 72 except ValueError: 73 raise ConfigError("Invalid genetree workflow: %s" %wname) 74 75 workflow = {wname: { 76 "_app": "genetree", 77 "_aa_aligner": aligner, 78 "_aa_alg_cleaner": trimmer, 79 "_aa_model_tester": modeltester, 80 "_aa_tree_builder": treebuilder, 81 "_nt_aligner": aligner, 82 "_nt_alg_cleaner": trimmer, 83 "_nt_model_tester": modeltester, 84 "_nt_tree_builder": treebuilder, 85 "_appset":"@builtin_apps"} 86 } 87 return workflow 88 89def list_workflows(config, target_type=None): 90 91 if not target_type or target_type == 'supermatrix': 92 print() 93 avail_meta = [(k, config["workflow_desc"].get(k, ""), len(v)) for k,v in six.iteritems(config.get('supermatrix_meta_workflow', {}))] 94 print_table(avail_meta, fix_col_width=[45, 60, 10], header=["Worflow name", "Description", "threads"], title="Supermatrix shortcut workflow names", row_line=True) 95 96 if not target_type or target_type == 'genetree': 97 print() 98 avail_meta = [(k, config["workflow_desc"].get(k, ""), len(v)) for k,v in six.iteritems(config.get('genetree_meta_workflow', {}))] 99 print_table(avail_meta, fix_col_width=[45, 60, 10], header=["Worflow name", "Description", "threads"], title="GeneTree shortcut workflow names", row_line=True) 100 101 102def list_apps(config, target_apps = None): 103 for appname, validapps in APPTYPES.items(): 104 if target_apps: 105 if appname not in target_apps and target_apps & validapps: 106 validapps = target_apps & validapps 107 elif appname in target_apps: 108 pass 109 else: 110 continue 111 112 avail_blocks = [[blockname, block["_app"], block.get("_desc", "")] for blockname, block in config.items() if block.get("_app") in validapps] 113 print_table(avail_blocks, header=["name", "app type", "desc."], max_col_width=70, title=appname, row_line=True) 114 print() 115 116def block_detail(block_name, config, color=True): 117 blocks_to_show = {} 118 iterable_types = set([set, list, tuple, frozenset]) 119 if block_name not in config: 120 try: 121 next_block = [x.lstrip('@') for x in config.get('genetree_meta_workflow', {})[block_name]] 122 metaworkflow = True 123 except Exception as e: 124 print(e) 125 raise ValueError('block name not found [%s]' %block_name) 126 else: 127 metaworkflow = False 128 next_block = [block_name] 129 130 pos = 0 131 while next_block: 132 block = next_block.pop() 133 blocks_to_show[block] = pos 134 for k1, v1 in six.iteritems(config[block]): 135 if type(v1) in iterable_types: 136 for v2 in v1: 137 if isinstance(v2, str) and v2.startswith('@'): 138 next_block.append(v2[1:]) 139 elif isinstance(v1, str) and v1.startswith('@'): 140 next_block.append(v1[1:]) 141 pos += 1 142 143 if metaworkflow and color: 144 print(colorify('[genetree_meta_workflow]', 'yellow')) 145 print("%s = %s" %(block_name, ', '.join(config["genetree_meta_workflow"][block_name]))) 146 print() 147 elif metaworkflow: 148 print('[genetree_meta_workflow]') 149 print("%s = %s" %(block_name, ', '.join(config["genetree_meta_workflow"][block_name]))) 150 print() 151 152 for b, pos in sorted(list(blocks_to_show.items()), key=lambda x: x[1]): 153 if b == "builtin_apps": 154 continue 155 if color: 156 print(colorify('[%s]' %b, 'green')) 157 else: 158 print('[%s]' %b) 159 160 for k,v in sorted(config[b].items()): 161 if k == "_inherits": 162 continue 163 if type(v) in iterable_types: 164 v = ', '.join(map(str, v))+',' 165 166 if color: 167 if k == '_app': 168 print(colorify('% 35s = %s' %(k, v), "lblue")) 169 else: 170 print('%s = %s' %(colorify("% 35s" %k, "orange"), v)) 171 else: 172 print('% 40s = %s' %(k, v)) 173 print() 174 175def parse_block(blockname, conf): 176 blocktype = conf[blockname].get('_app', 'unknown') 177 for attr, v in list(conf[blockname].items()): 178 conf[blockname][attr] = check_type(blocktype, attr, v) 179 if isinstance(conf[blockname][attr], list): 180 for i in conf[blockname][attr]: 181 check_block_link(conf, blockname, i, attr) 182 else: 183 check_block_link(conf, blockname, conf[blockname][attr], attr) 184 185 # Check for missing attributes 186 for tag, tester in six.iteritems(CHECKERS): 187 if tag[0] == blocktype and (tester[2] and tag[1] not in conf[blockname]): 188 raise ConfigError('[%s] attribute expected in block [%s]' %(tag[1], blockname)) 189 190def check_config(fname): 191 conf = ConfigObj(fname, list_values=True) 192 193 # expand meta_workflows 194 for meta_name, meta_wf in conf["genetree_meta_workflow"].items(): 195 for wkname in meta_wf: 196 conf.update(build_genetree_workflow(wkname.lstrip("@"))) 197 for meta_name, meta_wf in conf["supermatrix_meta_workflow"].items(): 198 for wkname in meta_wf: 199 conf.update(build_supermatrix_workflow(wkname.lstrip("@"))) 200 201 # expand inherits options 202 for k, v in list(conf.items()): 203 if '_inherits' in v: 204 base = v['_inherits'] 205 try: 206 new_dict = dict(conf[base]) 207 except KeyError: 208 raise ConfigError('[%s] config block is referred in [%s] but not present in config file' %(base, k)) 209 new_dict.update(v) 210 conf[k] = new_dict 211 212 # check blocks 213 for blockname in list(conf.keys()): 214 parse_block(blockname, conf) 215 216 # Check that the number of columns in main workflow definition is the same in all attributes 217 for flow_name in six.iterkeys(conf): 218 if conf[flow_name].get("_app", "") != "main": 219 continue 220 npr_config = [len(v) for k, v in six.iteritems(conf[flow_name]) 221 if type(v) == list and k != "target_levels"] 222 if len(set(npr_config)) != 1: 223 raise ConfigError("List values in [%s] should all have the same length" %flow_name) 224 return conf 225 226def check_type(blocktype, attr, v): 227 tag = (blocktype, attr) 228 if tag in CHECKERS: 229 tester, kargs, required = CHECKERS[tag] 230 return tester(v, **kargs) 231 else: 232 return v 233 234def check_block_link(conf, parent, v, attr_name): 235 if isinstance(v, str) and v.startswith('@'): 236 if v[1:] not in conf: 237 raise ConfigError('[%s] config block referred in [%s] but not found in config' %(v, parent)) 238 if attr_name in OPTION2APPTYPE and conf[v[1:]]["_app"] not in OPTION2APPTYPE[attr_name]: 239 raise ConfigError('[%s] is not valid [%s] application block' %(v, attr_name)) 240 241def is_file(value): 242 if os.path.isfile(value): 243 return value 244 else: 245 raise ConfigError("Not valid file") 246 247def is_dir(value): 248 if os.path.isdir(value): 249 return value 250 else: 251 raise ConfigError("Not valid file") 252 253def check_number(value, cast, minv=0, maxv=None): 254 try: 255 typed_value = cast(value) 256 except ValueError: 257 raise ConfigError("Expected [%s] number. Found [%s]" %(cast, value)) 258 else: 259 if (minv is not None and typed_value < cast(minv)) or \ 260 (maxv is not None and typed_value > cast(maxv)): 261 _minv = minv if minv is not None else "any" 262 _maxv = maxv if maxv is not None else "any" 263 raise ConfigError("[%s] not in the range (%s,%s)" % 264 (value, _minv, _maxv)) 265 return typed_value 266 267def is_set(value): 268 if not isinstance(value, list): 269 raise ConfigError("Expected a list of values. Found [%s]" %value) 270 return set(value) 271 272def is_appset_entry(value): 273 if not isinstance(value, list) or len(value) != 2: 274 raise ConfigError("unexpected application format [%s]. Expected [appname, maxcpus] format" %value) 275 try: 276 cores = int(value[2]) 277 except ValueError: 278 raise ConfigError("unexpected application format [%s]. Expected [appname, maxcpus] format (maxcpus as integer)" %value) 279 280 return [value[0], cores] 281 282def is_float_list(value, minv=0, maxv=None): 283 is_list(value) 284 typed_value = [] 285 for v in value: 286 typed_value.append(check_number(v, float, minv, maxv)) 287 return typed_value 288 289def is_integer_list(value, minv=0, maxv=None): 290 is_list(value) 291 typed_value = [] 292 for v in value: 293 typed_value.append(check_number(v, int, minv, maxv)) 294 return typed_value 295 296def is_float(value, minv=None, maxv=None): 297 return check_number(value, float, minv, maxv) 298 299def is_integer(value, minv=None, maxv=None): 300 return check_number(value, int, minv, maxv) 301 302 303def is_list(value): 304 if not isinstance(value, list): 305 raise ConfigError("[%s] is not a list" %value) 306 return value 307 308def is_app_link(value, allow_none=True): 309 if allow_none and value == 'none': 310 return value 311 elif value.startswith('@'): 312 return value 313 else: 314 raise ConfigError('[%s] is not a valid block link' %value) 315 316def is_app_list(value, allow_none=True): 317 is_list(value) 318 for v in value: 319 is_app_link(v, allow_none=allow_none) 320 return value 321 322 323def is_boolean(value): 324 if str(value).lower() in set(["1", "true", "yes"]): 325 return True 326 elif str(value).lower() in set(["0", "false", "no"]): 327 return False 328 else: 329 raise ConfigError('[%s] is not a boolean value' %(value)) 330 331def is_integer_list(value, maxv=None, minv=None): 332 is_list(value) 333 return [is_integer(v, maxv=maxv, minv=minv) for v in value] 334 335def is_correlative_integer_list(value, minv=None, maxv=None): 336 is_list(value) 337 typed_value = [] 338 last_value = 0 339 for v in value: 340 cv = is_integer(v, minv=None, maxv=None) 341 typed_value.append(cv) 342 if cv <= last_value: 343 raise ConfigError("[%s] Numeric values are not correlative" %value) 344 last_value = cv 345 return typed_value 346 347def is_text(value): 348 if isinstance(value, str): 349 return value 350 else: 351 raise ConfigError("[%s] is not a valid text string" %value) 352 353def is_percent(value): 354 try: 355 is_float(value.rstrip('%')) 356 except ConfigError: 357 raise ConfigError('[%s] should a percentage value (i.e. 0.4%%)' %value) 358 return value 359 360def is_integer_or_percent(value): 361 try: 362 return is_integer(value) 363 except ConfigError: 364 try: 365 is_percent(value) 366 except ConfigError: 367 raise ConfigError('[%s] should be an integer or a percentage (i.e. 15 or 0.4%%)' %value) 368 return value 369 370def is_choice(value, choices): 371 if value in choices: 372 return value 373 else: 374 raise ConfigError('[%s] should be one of %s' %(value, choices)) 375 376def is_raxml_bootstrap(value): 377 try: 378 return is_integer(value) 379 except ValueError: 380 if value == 'alrt' or value == 'alrt_phyml': 381 return value 382 else: 383 raise ConfigError('[%s] bootstrap value should an integer, "alrt" or "phyml_alrt"' %(value)) 384 385 386 387CHECKERS = { 388 # (app_name, attr_name): (checker_fn, args, required_attr) 389 ("main", "_npr"): (is_app_list, {}, True), 390 ("main", "_workflow"): (is_app_list, {"allow_none":False}, True), 391 ("main", "_appset"): (is_app_link, {"allow_none":False}, True), 392 393 ("npr", "_max_iters"): (is_integer, {"minv":1}, True), 394 ("npr", "_switch_aa_similarity"): (is_float, {"minv":0, "maxv":1}, True), 395 ("npr", "_max_seq_similarity"): (is_float, {"minv":0, "maxv":1}, True), 396 ("npr", "_min_branch_support"): (is_float, {"minv":0, "maxv":1}, True), 397 ("npr", "_min_npr_size"): (is_integer, {"minv":3}, True), 398 ("npr", "_tree_splitter"): (is_app_link, {}, True), 399 ("npr", "_target_levels"): (is_list, {}, False), 400 401 ("genetree", "_aa_aligner"): (is_app_link, {}, True), 402 ("genetree", "_nt_aligner"): (is_app_link, {}, True), 403 ("genetree", "_aa_alg_cleaner"): (is_app_link, {}, True), 404 ("genetree", "_nt_alg_cleaner"): (is_app_link, {}, True), 405 ("genetree", "_aa_model_tester"): (is_app_link, {}, True), 406 ("genetree", "_nt_model_tester"): (is_app_link, {}, True), 407 ("genetree", "_aa_tree_builder"): (is_app_link, {}, True), 408 ("genetree", "_nt_tree_builder"): (is_app_link, {}, True), 409 ("genetree", "_appset"): (is_app_link, {"allow_none":False}, True), 410 411 ("supermatrix", "_cog_selector"): (is_app_link, {}, True), 412 ("supermatrix", "_alg_concatenator"): (is_app_link, {}, True), 413 ("supermatrix", "_aa_tree_builder"): (is_app_link, {}, True), 414 ("supermatrix", "_nt_tree_builder"): (is_app_link, {}, True), 415 ("genetree", "_appset"): (is_app_link, {"allow_none":False}, True), 416 417 ("concatalg", "_default_aa_model"): (is_text, {}, True), 418 ("concatalg", "_default_nt_model"): (is_text, {}, True), 419 420 #("concatalg", "_workflow"): (is_app_link, {"allow_none":False}, True), 421 422 ("cogselector", "_species_missing_factor"): (is_float, {"minv":0, "maxv":1}, True), 423 ("cogselector", "_max_cogs"): (is_integer, {"minv":1}, True), 424 425 ("treesplitter", "_max_outgroup_size"): (is_integer_or_percent, {}, True), 426 ("treesplitter", "_min_outgroup_support"): (is_float, {"minv":0, "maxv":1}, True), 427 ("treesplitter", "_outgroup_topology_dist"): (is_boolean, {}, True), 428 ("treesplitter", "_first_split"): (is_text, {}, True), 429 430 ("metaaligner", "_aligners"): (is_app_list, {}, True), 431 ("metaaligner", "_alg_trimming"): (is_boolean, {}, True), 432 433 ("prottest", "_lk_mode"): (is_choice, {"choices":set(['phyml', 'raxml'])}, True), 434 ("prottest", "_models"): (is_list, {}, True), 435 436 437 ("pmodeltest", "_aa_models"): (is_text, {}, True), 438 ("pmodeltest", "_nt_models"): (is_text, {}, True), 439 440 ("raxml", "_aa_model"): (is_text, {}, True), 441 ("raxml", "_method"): (is_choice, {"choices":set(['GAMMA', 'CAT'])}, True), 442 ("raxml", "_bootstrap"): (is_raxml_bootstrap, {}, True), 443 ("raxml", "_model_suffix"): (is_text, {}, True), 444 445 ("raxml-sse", "_aa_model"): (is_text, {}, True), 446 ("raxml-sse", "_method"): (is_choice, {"choices":set(['GAMMA', 'CAT'])}, True), 447 ("raxml-sse", "_alrt_calculation"): (is_choice, {"choices":set(['phyml', 'raxml'])}, True), 448 449 ("raxml-avx", "_aa_model"): (is_text, {}, True), 450 ("raxml-avx", "_method"): (is_choice, {"choices":set(['GAMMA', 'CAT'])}, True), 451 ("raxml-avx", "_alrt_calculation"): (is_choice, {"choices":set(['phyml', 'raxml'])}, True), 452 453 ("appset", "muscle"): (is_appset_entry, {}, True), 454 ("appset", "mafft"): (is_appset_entry, {}, True), 455 ("appset", "clustalo"): (is_appset_entry, {}, True), 456 ("appset", "trimal"): (is_appset_entry, {}, True), 457 ("appset", "readal"): (is_appset_entry, {}, True), 458 ("appset", "tcoffee"): (is_appset_entry, {}, True), 459 ("appset", "phyml"): (is_appset_entry, {}, True), 460 ("appset", "raxml-pthreads"): (is_appset_entry, {}, True), 461 ("appset", "raxml"): (is_appset_entry, {}, True), 462 # ("appset", "raxml-pthreads-sse3"): (is_appset_entry, {}, True), 463 # ("appset", "raxml-sse3"): (is_appset_entry, {}, True), 464 # ("appset", "raxml-pthreads-avx"): (is_appset_entry, {}, True), 465 # ("appset", "raxml-avx"): (is_appset_entry, {}, True), 466 # ("appset", "raxml-pthreads-avx2"): (is_appset_entry, {}, True), 467 # ("appset", "raxml-avx2"): (is_appset_entry, {}, True), 468 ("appset", "dialigntx"): (is_appset_entry, {}, True), 469 ("appset", "fasttree"): (is_appset_entry, {}, True), 470 ("appset", "statal"): (is_appset_entry, {}, True), 471 472 473 474 } 475