1# #START_LICENSE###########################################################
2#
3#
4# This file is part of the Environment for Tree Exploration program
5# (ETE).  http://etetoolkit.org
6#
7# ETE is free software: you can redistribute it and/or modify it
8# under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# ETE is distributed in the hope that it will be useful, but WITHOUT
13# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15# License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with ETE.  If not, see <http://www.gnu.org/licenses/>.
19#
20#
21#                     ABOUT THE ETE PACKAGE
22#                     =====================
23#
24# ETE is distributed under the GPL copyleft license (2008-2015).
25#
26# If you make use of ETE in published work, please cite:
27#
28# Jaime Huerta-Cepas, Joaquin Dopazo and Toni Gabaldon.
29# ETE: a python Environment for Tree Exploration. Jaime BMC
30# Bioinformatics 2010,:24doi:10.1186/1471-2105-11-24
31#
32# Note that extra references to the specific methods implemented in
33# the toolkit may be available in the documentation.
34#
35# More info at http://etetoolkit.org. Contact: huerta@embl.de
36#
37#
38# #END_LICENSE#############################################################
39from __future__ import absolute_import
40from __future__ import print_function
41
42import os
43from ...utils import print_table
44from .configobj import ConfigObj
45from .errors import ConfigError
46from .utils import colorify
47from .apps import APP2CLASS, OPTION2APPTYPE, APPTYPES
48
49import six
50from six.moves import map
51
52def build_supermatrix_workflow(wname):
53    try:
54        cog_selector, alg_concatenator, treebuilder = map(lambda x: "@%s" %x, wname.split("-"))
55    except ValueError:
56        raise ConfigError("Invalid supermatrix workflow: %s" %wname)
57
58    workflow = {wname: {
59        "_app": "supermatrix",
60        "_cog_selector": cog_selector,
61        "_alg_concatenator": alg_concatenator,
62        "_aa_tree_builder": treebuilder,
63        "_nt_tree_builder": treebuilder,
64        "_appset":"@builtin_apps"}
65    }
66    return workflow
67
68def build_genetree_workflow(wname):
69    try:
70        aligner, trimmer, modeltester, treebuilder = map(lambda x: "none" if x.lower() == "none" else "@%s"%x,
71                                                         wname.split("-"))
72    except ValueError:
73        raise ConfigError("Invalid genetree workflow: %s" %wname)
74
75    workflow = {wname: {
76        "_app": "genetree",
77        "_aa_aligner": aligner,
78        "_aa_alg_cleaner": trimmer,
79        "_aa_model_tester": modeltester,
80        "_aa_tree_builder": treebuilder,
81        "_nt_aligner": aligner,
82        "_nt_alg_cleaner": trimmer,
83        "_nt_model_tester": modeltester,
84        "_nt_tree_builder": treebuilder,
85        "_appset":"@builtin_apps"}
86    }
87    return workflow
88
89def list_workflows(config, target_type=None):
90
91    if not target_type or target_type == 'supermatrix':
92        print()
93        avail_meta = [(k, config["workflow_desc"].get(k, ""), len(v)) for k,v in six.iteritems(config.get('supermatrix_meta_workflow', {}))]
94        print_table(avail_meta, fix_col_width=[45, 60, 10], header=["Worflow name", "Description", "threads"], title="Supermatrix shortcut workflow names", row_line=True)
95
96    if not target_type or target_type == 'genetree':
97        print()
98        avail_meta = [(k, config["workflow_desc"].get(k, ""), len(v)) for k,v in six.iteritems(config.get('genetree_meta_workflow', {}))]
99        print_table(avail_meta, fix_col_width=[45, 60, 10], header=["Worflow name", "Description", "threads"], title="GeneTree shortcut workflow names", row_line=True)
100
101
102def list_apps(config, target_apps = None):
103    for appname, validapps in APPTYPES.items():
104        if target_apps:
105            if appname not in target_apps and target_apps & validapps:
106                validapps = target_apps & validapps
107            elif appname in target_apps:
108                pass
109            else:
110                continue
111
112        avail_blocks = [[blockname, block["_app"], block.get("_desc", "")] for blockname, block in config.items() if block.get("_app") in validapps]
113        print_table(avail_blocks, header=["name", "app type", "desc."], max_col_width=70, title=appname, row_line=True)
114        print()
115
116def block_detail(block_name, config, color=True):
117    blocks_to_show = {}
118    iterable_types = set([set, list, tuple, frozenset])
119    if block_name not in config:
120        try:
121            next_block = [x.lstrip('@') for x in config.get('genetree_meta_workflow', {})[block_name]]
122            metaworkflow = True
123        except Exception as e:
124            print(e)
125            raise ValueError('block name not found [%s]' %block_name)
126    else:
127        metaworkflow = False
128        next_block = [block_name]
129
130    pos = 0
131    while next_block:
132        block = next_block.pop()
133        blocks_to_show[block] = pos
134        for k1, v1 in six.iteritems(config[block]):
135            if type(v1) in iterable_types:
136                for v2 in v1:
137                    if isinstance(v2, str) and v2.startswith('@'):
138                        next_block.append(v2[1:])
139            elif isinstance(v1, str) and v1.startswith('@'):
140                next_block.append(v1[1:])
141        pos += 1
142
143    if metaworkflow and color:
144        print(colorify('[genetree_meta_workflow]', 'yellow'))
145        print("%s = %s" %(block_name, ', '.join(config["genetree_meta_workflow"][block_name])))
146        print()
147    elif metaworkflow:
148        print('[genetree_meta_workflow]')
149        print("%s = %s" %(block_name, ', '.join(config["genetree_meta_workflow"][block_name])))
150        print()
151
152    for b, pos in sorted(list(blocks_to_show.items()), key=lambda x: x[1]):
153        if b == "builtin_apps":
154            continue
155        if color:
156            print(colorify('[%s]' %b, 'green'))
157        else:
158            print('[%s]' %b)
159
160        for k,v in sorted(config[b].items()):
161            if k == "_inherits":
162                continue
163            if type(v) in iterable_types:
164                v = ', '.join(map(str, v))+','
165
166            if color:
167                if k == '_app':
168                    print(colorify('% 35s = %s' %(k, v), "lblue"))
169                else:
170                    print('%s = %s' %(colorify("% 35s" %k, "orange"), v))
171            else:
172                print('% 40s = %s' %(k, v))
173        print()
174
175def parse_block(blockname, conf):
176    blocktype = conf[blockname].get('_app', 'unknown')
177    for attr, v in list(conf[blockname].items()):
178        conf[blockname][attr] = check_type(blocktype, attr, v)
179        if isinstance(conf[blockname][attr], list):
180            for i in conf[blockname][attr]:
181                check_block_link(conf, blockname, i, attr)
182        else:
183            check_block_link(conf, blockname, conf[blockname][attr], attr)
184
185    # Check for missing attributes
186    for tag, tester in six.iteritems(CHECKERS):
187        if tag[0] == blocktype and (tester[2] and tag[1] not in conf[blockname]):
188            raise ConfigError('[%s] attribute expected in block [%s]' %(tag[1], blockname))
189
190def check_config(fname):
191    conf = ConfigObj(fname, list_values=True)
192
193    # expand meta_workflows
194    for meta_name, meta_wf in conf["genetree_meta_workflow"].items():
195        for wkname in meta_wf:
196            conf.update(build_genetree_workflow(wkname.lstrip("@")))
197    for meta_name, meta_wf in conf["supermatrix_meta_workflow"].items():
198        for wkname in meta_wf:
199            conf.update(build_supermatrix_workflow(wkname.lstrip("@")))
200
201    # expand inherits options
202    for k, v in list(conf.items()):
203        if '_inherits' in v:
204            base = v['_inherits']
205            try:
206                new_dict = dict(conf[base])
207            except KeyError:
208                raise ConfigError('[%s] config block is referred in [%s] but not present in config file' %(base, k))
209            new_dict.update(v)
210            conf[k] = new_dict
211
212    # check blocks
213    for blockname in list(conf.keys()):
214        parse_block(blockname, conf)
215
216    # Check that the number of columns in main workflow definition is the same in all attributes
217    for flow_name in six.iterkeys(conf):
218        if conf[flow_name].get("_app", "") != "main":
219            continue
220        npr_config = [len(v) for k, v in six.iteritems(conf[flow_name])
221                      if type(v) == list and k != "target_levels"]
222        if len(set(npr_config)) != 1:
223            raise ConfigError("List values in [%s] should all have the same length" %flow_name)
224    return conf
225
226def check_type(blocktype, attr, v):
227    tag = (blocktype, attr)
228    if tag in CHECKERS:
229        tester, kargs, required = CHECKERS[tag]
230        return tester(v, **kargs)
231    else:
232        return v
233
234def check_block_link(conf, parent, v, attr_name):
235    if isinstance(v, str) and v.startswith('@'):
236        if v[1:] not in conf:
237            raise ConfigError('[%s] config block referred in [%s] but not found in config' %(v, parent))
238        if attr_name in OPTION2APPTYPE and conf[v[1:]]["_app"] not in OPTION2APPTYPE[attr_name]:
239            raise ConfigError('[%s] is not valid [%s] application block' %(v, attr_name))
240
241def is_file(value):
242    if os.path.isfile(value):
243        return value
244    else:
245        raise ConfigError("Not valid file")
246
247def is_dir(value):
248    if os.path.isdir(value):
249        return value
250    else:
251        raise ConfigError("Not valid file")
252
253def check_number(value, cast, minv=0, maxv=None):
254    try:
255        typed_value = cast(value)
256    except ValueError:
257        raise ConfigError("Expected [%s] number. Found [%s]" %(cast, value))
258    else:
259        if (minv is not None and typed_value < cast(minv)) or \
260           (maxv is not None and typed_value > cast(maxv)):
261            _minv = minv if minv is not None else "any"
262            _maxv = maxv if maxv is not None else "any"
263            raise ConfigError("[%s] not in the range (%s,%s)" %
264                              (value, _minv, _maxv))
265    return typed_value
266
267def is_set(value):
268    if not isinstance(value, list):
269        raise ConfigError("Expected a list of values. Found [%s]" %value)
270    return set(value)
271
272def is_appset_entry(value):
273    if not isinstance(value, list) or len(value) != 2:
274        raise ConfigError("unexpected application format [%s]. Expected [appname, maxcpus] format" %value)
275    try:
276        cores = int(value[2])
277    except ValueError:
278        raise ConfigError("unexpected application format [%s]. Expected [appname, maxcpus] format (maxcpus as integer)" %value)
279
280    return [value[0], cores]
281
282def is_float_list(value, minv=0, maxv=None):
283    is_list(value)
284    typed_value = []
285    for v in value:
286        typed_value.append(check_number(v, float, minv, maxv))
287    return typed_value
288
289def is_integer_list(value, minv=0, maxv=None):
290    is_list(value)
291    typed_value = []
292    for v in value:
293        typed_value.append(check_number(v, int, minv, maxv))
294    return typed_value
295
296def is_float(value, minv=None, maxv=None):
297    return check_number(value, float, minv, maxv)
298
299def is_integer(value, minv=None, maxv=None):
300    return check_number(value, int, minv, maxv)
301
302
303def is_list(value):
304    if not isinstance(value, list):
305        raise ConfigError("[%s] is not a list" %value)
306    return value
307
308def is_app_link(value, allow_none=True):
309    if allow_none and value == 'none':
310        return value
311    elif value.startswith('@'):
312        return value
313    else:
314        raise ConfigError('[%s] is not a valid block link' %value)
315
316def is_app_list(value, allow_none=True):
317    is_list(value)
318    for v in value:
319        is_app_link(v, allow_none=allow_none)
320    return value
321
322
323def is_boolean(value):
324    if str(value).lower() in set(["1", "true", "yes"]):
325        return True
326    elif str(value).lower() in set(["0", "false", "no"]):
327        return False
328    else:
329        raise ConfigError('[%s] is not a boolean value' %(value))
330
331def is_integer_list(value, maxv=None, minv=None):
332    is_list(value)
333    return [is_integer(v, maxv=maxv, minv=minv) for v in value]
334
335def is_correlative_integer_list(value, minv=None, maxv=None):
336    is_list(value)
337    typed_value = []
338    last_value = 0
339    for v in value:
340        cv = is_integer(v, minv=None, maxv=None)
341        typed_value.append(cv)
342        if cv <= last_value:
343            raise ConfigError("[%s] Numeric values are not correlative" %value)
344        last_value = cv
345    return typed_value
346
347def is_text(value):
348    if isinstance(value, str):
349        return value
350    else:
351        raise ConfigError("[%s] is not a valid text string" %value)
352
353def is_percent(value):
354    try:
355        is_float(value.rstrip('%'))
356    except ConfigError:
357        raise ConfigError('[%s] should a percentage value (i.e. 0.4%%)' %value)
358    return value
359
360def is_integer_or_percent(value):
361    try:
362        return is_integer(value)
363    except ConfigError:
364        try:
365            is_percent(value)
366        except ConfigError:
367            raise ConfigError('[%s] should be an integer or a percentage (i.e. 15 or 0.4%%)' %value)
368    return value
369
370def is_choice(value, choices):
371    if value in choices:
372        return value
373    else:
374        raise ConfigError('[%s] should be one of %s' %(value, choices))
375
376def is_raxml_bootstrap(value):
377    try:
378        return is_integer(value)
379    except ValueError:
380        if value == 'alrt' or value == 'alrt_phyml':
381            return value
382        else:
383            raise ConfigError('[%s] bootstrap value should an integer, "alrt" or "phyml_alrt"' %(value))
384
385
386
387CHECKERS = {
388    # (app_name, attr_name): (checker_fn, args, required_attr)
389    ("main", "_npr"): (is_app_list, {}, True),
390    ("main", "_workflow"): (is_app_list, {"allow_none":False}, True),
391    ("main", "_appset"): (is_app_link, {"allow_none":False}, True),
392
393    ("npr", "_max_iters"): (is_integer, {"minv":1}, True),
394    ("npr", "_switch_aa_similarity"): (is_float, {"minv":0, "maxv":1}, True),
395    ("npr", "_max_seq_similarity"): (is_float, {"minv":0, "maxv":1}, True),
396    ("npr", "_min_branch_support"): (is_float, {"minv":0, "maxv":1}, True),
397    ("npr", "_min_npr_size"): (is_integer, {"minv":3}, True),
398    ("npr", "_tree_splitter"): (is_app_link, {}, True),
399    ("npr", "_target_levels"): (is_list, {}, False),
400
401    ("genetree", "_aa_aligner"): (is_app_link, {}, True),
402    ("genetree", "_nt_aligner"): (is_app_link, {}, True),
403    ("genetree", "_aa_alg_cleaner"): (is_app_link, {}, True),
404    ("genetree", "_nt_alg_cleaner"): (is_app_link, {}, True),
405    ("genetree", "_aa_model_tester"): (is_app_link, {}, True),
406    ("genetree", "_nt_model_tester"): (is_app_link, {}, True),
407    ("genetree", "_aa_tree_builder"): (is_app_link, {}, True),
408    ("genetree", "_nt_tree_builder"): (is_app_link, {}, True),
409    ("genetree", "_appset"): (is_app_link, {"allow_none":False}, True),
410
411    ("supermatrix", "_cog_selector"): (is_app_link, {}, True),
412    ("supermatrix", "_alg_concatenator"): (is_app_link, {}, True),
413    ("supermatrix", "_aa_tree_builder"): (is_app_link, {}, True),
414    ("supermatrix", "_nt_tree_builder"): (is_app_link, {}, True),
415    ("genetree", "_appset"): (is_app_link, {"allow_none":False}, True),
416
417    ("concatalg", "_default_aa_model"): (is_text, {}, True),
418    ("concatalg", "_default_nt_model"): (is_text, {}, True),
419
420    #("concatalg", "_workflow"): (is_app_link, {"allow_none":False}, True),
421
422    ("cogselector", "_species_missing_factor"): (is_float, {"minv":0, "maxv":1}, True),
423    ("cogselector", "_max_cogs"): (is_integer, {"minv":1}, True),
424
425    ("treesplitter", "_max_outgroup_size"): (is_integer_or_percent, {}, True),
426    ("treesplitter", "_min_outgroup_support"): (is_float, {"minv":0, "maxv":1}, True),
427    ("treesplitter", "_outgroup_topology_dist"): (is_boolean, {}, True),
428    ("treesplitter", "_first_split"): (is_text, {}, True),
429
430    ("metaaligner", "_aligners"): (is_app_list, {}, True),
431    ("metaaligner", "_alg_trimming"): (is_boolean, {}, True),
432
433    ("prottest", "_lk_mode"): (is_choice, {"choices":set(['phyml', 'raxml'])}, True),
434    ("prottest", "_models"): (is_list, {}, True),
435
436
437    ("pmodeltest", "_aa_models"): (is_text, {}, True),
438    ("pmodeltest", "_nt_models"): (is_text, {}, True),
439
440    ("raxml", "_aa_model"): (is_text, {}, True),
441    ("raxml", "_method"): (is_choice, {"choices":set(['GAMMA', 'CAT'])}, True),
442    ("raxml", "_bootstrap"): (is_raxml_bootstrap, {}, True),
443    ("raxml", "_model_suffix"): (is_text, {}, True),
444
445    ("raxml-sse", "_aa_model"): (is_text, {}, True),
446    ("raxml-sse", "_method"): (is_choice, {"choices":set(['GAMMA', 'CAT'])}, True),
447    ("raxml-sse", "_alrt_calculation"): (is_choice, {"choices":set(['phyml', 'raxml'])}, True),
448
449    ("raxml-avx", "_aa_model"): (is_text, {}, True),
450    ("raxml-avx", "_method"): (is_choice, {"choices":set(['GAMMA', 'CAT'])}, True),
451    ("raxml-avx", "_alrt_calculation"): (is_choice, {"choices":set(['phyml', 'raxml'])}, True),
452
453    ("appset", "muscle"): (is_appset_entry, {}, True),
454    ("appset", "mafft"): (is_appset_entry, {}, True),
455    ("appset", "clustalo"): (is_appset_entry, {}, True),
456    ("appset", "trimal"): (is_appset_entry, {}, True),
457    ("appset", "readal"): (is_appset_entry, {}, True),
458    ("appset", "tcoffee"): (is_appset_entry, {}, True),
459    ("appset", "phyml"): (is_appset_entry, {}, True),
460    ("appset", "raxml-pthreads"): (is_appset_entry, {}, True),
461    ("appset", "raxml"): (is_appset_entry, {}, True),
462    # ("appset", "raxml-pthreads-sse3"): (is_appset_entry, {}, True),
463    # ("appset", "raxml-sse3"): (is_appset_entry, {}, True),
464    # ("appset", "raxml-pthreads-avx"): (is_appset_entry, {}, True),
465    # ("appset", "raxml-avx"): (is_appset_entry, {}, True),
466    # ("appset", "raxml-pthreads-avx2"): (is_appset_entry, {}, True),
467    # ("appset", "raxml-avx2"): (is_appset_entry, {}, True),
468    ("appset", "dialigntx"): (is_appset_entry, {}, True),
469    ("appset", "fasttree"): (is_appset_entry, {}, True),
470    ("appset", "statal"): (is_appset_entry, {}, True),
471
472
473
474    }
475