1# #START_LICENSE###########################################################
2#
3#
4# This file is part of the Environment for Tree Exploration program
5# (ETE).  http://etetoolkit.org
6#
7# ETE is free software: you can redistribute it and/or modify it
8# under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# ETE is distributed in the hope that it will be useful, but WITHOUT
13# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15# License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with ETE.  If not, see <http://www.gnu.org/licenses/>.
19#
20#
21#                     ABOUT THE ETE PACKAGE
22#                     =====================
23#
24# ETE is distributed under the GPL copyleft license (2008-2015).
25#
26# If you make use of ETE in published work, please cite:
27#
28# Jaime Huerta-Cepas, Joaquin Dopazo and Toni Gabaldon.
29# ETE: a python Environment for Tree Exploration. Jaime BMC
30# Bioinformatics 2010,:24doi:10.1186/1471-2105-11-24
31#
32# Note that extra references to the specific methods implemented in
33# the toolkit may be available in the documentation.
34#
35# More info at http://etetoolkit.org. Contact: huerta@embl.de
36#
37#
38# #END_LICENSE#############################################################
39from __future__ import absolute_import
40from __future__ import print_function
41
42from collections import defaultdict
43import logging
44
45from ..utils import (DEBUG, GLOBALS, SeqGroup, tobool, sec2time, read_time_file,
46                     _max, _min, _mean, _std, _median, cmp)
47from ..apps import APP2CLASS
48from .. import task as all_tasks
49from .. import db
50from ..errors import ConfigError, DataError, TaskError
51from ..master_task import register_task_recursively, isjob
52import six
53from six.moves import range
54
55log = logging.getLogger("main")
56
57class IterConfig(dict):
58    def __init__(self, conf, wkname, size, seqtype):
59        """Special dict to extract the value of each parameter given
60         the properties of a task: size and seqtype.
61        """
62        dict.__init__(self, conf[wkname])
63
64        self.conf = conf
65        self.seqtype = seqtype
66        self.size = size
67        self['npr_wf_type'] = conf['_npr'].get('wf_type', None)
68        self['npr_workflows'] = conf['_npr'].get('workflows', [])
69        self['switch_aa_similarity'] = conf['_npr'].get('nt_switch_thr', 1.0)
70        if conf[wkname]["_app"] == self['npr_wf_type']:
71            self['max_iters'] = conf['_npr'].get('max_iters', 1)  # 1 = no npr by default!
72        else:
73            self['max_iters'] = 1
74
75        self['_tree_splitter'] = '@default_tree_splitter'
76
77        # if max_outgroup size is 0, means that no rooting is done in child NPR trees
78        self['use_outgroup'] = conf['default_tree_splitter']['_max_outgroup_size'] != 0
79
80    def __getattr__(self, v):
81        try:
82            return dict.__getattr__(self, v)
83        except AttributeError:
84            return self.__getitem__(v)
85
86    def __getitem__(self, v):
87        # Automatically switch among nt and aa bindings
88        if v in set(["tree_builder", "aligner", "model_tester",
89                     "alg_cleaner"]):
90            v = "%s_%s" %(self.seqtype, v)
91
92        try:
93            value = dict.__getitem__(self, "_%s" %v)
94        except KeyError as e:
95            return dict.__getitem__(self, v)
96        else:
97            # If list, let's take the correct element
98            if type(value) == list:
99                raise ValueError('This should not occur. Please report the error!')
100
101            if type(value) != str:
102                return value
103            elif value.lower() == "none":
104                return None, None
105            elif value.startswith("@"):
106                classname = APP2CLASS[self.conf[value[1:]]["_app"]]
107                return value[1:], getattr(all_tasks, classname)
108            else:
109                return value
110
111def process_new_tasks(task, new_tasks, conf):
112    # Basic registration and processing of newly generated tasks
113    parent_taskid = task.taskid if task else None
114    for ts in new_tasks:
115        log.log(22, "Registering new task: %s", ts)
116        register_task_recursively(ts, parentid=parent_taskid)
117        conf["_nodeinfo"][ts.nodeid].setdefault("tasks", []).append(ts)
118        # sort task by nodeid
119        #GLOBALS["nodeinfo"][ts.nodeid].setdefault("tasks", []).append(ts)
120        if task:
121            # Clone processor, in case tasks belong to a side workflow
122            ts.task_processor = task.task_processor
123            ts.configid = task.configid
124            ts.threadid = task.threadid
125            ts.main_tree = task.main_tree
126            # NPR allows switching the workflow associated to new tasks, if so,
127            # child task should have a target_wkname attribute already,
128            # otherwise we assume the same parent workflow
129            if not hasattr(ts, "target_wkname"):
130                ts.target_wkname = task.target_wkname
131
132        #db.add_runid2task(ts.threadid, ts.taskid)
133
134def inc_iternumber(threadid):
135    current_iter = get_iternumber(threadid)
136    GLOBALS["threadinfo"][threadid]["last_iter"] = current_iter + 1
137    return current_iter + 1
138
139def get_iternumber(threadid):
140    return GLOBALS["threadinfo"][threadid].setdefault("last_iter", 1)
141
142def get_identity(fname):
143    s = SeqGroup(fname)
144    seqlen = len(six.itervalues(s.id2seq))
145    ident = list()
146    for i in range(seqlen):
147        states = defaultdict(int)
148        for seq in six.itervalues(s.id2seq):
149            if seq[i] != "-":
150                states[seq[i]] += 1
151        values = list(states.values())
152        if values:
153            ident.append(float(max(values))/sum(values))
154    return (_max(ident), _min(ident),
155            _mean(ident), _std(ident))
156
157
158def get_seqs_identity(alg, seqs):
159    ''' Returns alg statistics regarding a set of sequences'''
160    seqlen = len(alg.get_seq(seqs[0]))
161    ident = list()
162    for i in range(seqlen):
163        states = defaultdict(int)
164        for seq_id in seqs:
165            seq = alg.get_seq(seq_id)
166            if seq[i] != "-":
167                states[seq[i]] += 1
168        values = list(states.values())
169        if values:
170            ident.append(float(max(values))/sum(values))
171    return (_max(ident), _min(ident),
172            _mean(ident), _std(ident))
173
174
175def split_tree(task_tree_node, task_outgroups, main_tree, alg_path, npr_conf, threadid, target_cladeids):
176    """Browses a task tree from root to leaves and yields next
177    suitable nodes for NPR iterations. Each yielded node comes with
178    the set of target and outgroup tips.
179    """
180
181
182    def processable_node(_n):
183        """This an internal function that returns true if a given node
184        is suitable for a NPR iteration. It can be used as
185        "is_leaf_fn" when traversing a tree.
186
187        Note that this function uses several variables which change within the
188        split_tree function, so must be kept within its namespace.
189
190        """
191        is_leaf = False
192        for wkname, wkfilter in npr_conf.npr_workflows:
193            # if node is not in the targets or does not meet size filters, skip
194            # workflow
195            if _n is master_node or \
196               (_TARGET_NODES and _n not in _TARGET_NODES) or \
197               (target_cladeids and _n.cladeid not in target_cladeids) or \
198               len(n2content[_n]) < max(wkfilter.get("min_size", 3), 3) or \
199               ("max_size" in wkfilter and len(n2content[_n]) > wkfilter["max_size"]):
200                continue
201
202            # If seq_sim filter used, calculate node stats
203            if ALG and ("min_seq_sim" in wkfilter or "max_seq_sim" in wkfilter):
204                if not hasattr(_n, "seqs_mean_ident"):
205                    log.log(20, "Calculating node sequence stats...")
206                    mx, mn, avg, std = get_seqs_identity(ALG,
207                                                         [__n.name for __n in n2content[_n]])
208                    _n.add_features(seqs_max_ident=mx, seqs_min_ident=mn,
209                                    seqs_mean_ident=avg, seqs_std_ident=std)
210                    log.log(20, "mx=%s, mn=%s, avg=%s, std=%s" %(mx, mn, avg, std))
211
212
213                if _n.seqs_mean_ident < wkfilter["min_seq_sim"]:
214                    continue
215
216                if _n.seqs_mean_ident > wkfilter["max_seq_sim"]:
217                    continue
218
219
220            else:
221                _n.add_features(seqs_max_ident=None, seqs_min_ident=None,
222                                seqs_mean_ident=None, seqs_std_ident=None)
223
224            if "min_support" in wkfilter:
225                # If we are optimizing only lowly supported nodes, and nodes are
226                # optimized without an outgroup, our target node is actually the
227                # parent of lowly supported nodes. Therefore, I check if support
228                # is low in children nodes, and return this node if so.
229                if not npr_conf.use_outgroup:
230                    if not [_ch for _ch in _n.children if _ch.support <= wkfilter["min_support"]]:
231                        continue
232                # Otherwise, just skip the node if it above the min support
233                elif _n.support > wkfilter["min_support"]:
234                    continue
235
236            # At this point, node passed all filters of this workflow were met,
237            # so it can be optimized
238            is_leaf = True
239            _n._target_wkname = wkname
240            break
241
242        return is_leaf
243
244    log.log(20, "Loading tree content...")
245    n2content = main_tree.get_cached_content()
246    if alg_path:
247        log.log(20, "Loading associated alignment to check seq. similarity")
248        raw_alg = db.get_task_data(*alg_path.split("."))
249        ALG = SeqGroup(raw_alg)
250    else:
251        ALG = None
252
253    log.log(20, "Finding next NPR nodes...")
254    # task_tree_node is actually a node in main_tree, since it has been
255    # already merged
256    trees_to_browse = [task_tree_node]
257    npr_nodes = 0
258    # loads current tree content, so we can check not reconstructing exactly the
259    # same tree
260    tasktree_content = set([leaf.name for leaf in n2content[task_tree_node]]) | set(task_outgroups)
261    while trees_to_browse:
262        master_node = trees_to_browse.pop()
263
264        # if custom taxa levels are defined as targets, find them in this
265        # subtree
266        _TARGET_NODES = defaultdict(list) # this container is used by
267                                          # processable_node function
268        opt_levels = GLOBALS[threadid].get('_optimized_levels', None)
269        if opt_levels is not None:
270            # any descendant of the already processed node is suitable for
271            # selection. If the ancestor of level-species is on top of the
272            # task_tree_node, it will be discarded
273            avail_nodes = set(master_node.get_descendants())
274            for lin in opt_levels:
275                sp2lin, lin2sp = GLOBALS["lineages"]
276                optimized, strict_monophyly = opt_levels[lin]
277                if not optimized:
278                    ancestor = main_tree.get_common_ancestor(*lin2sp[lin])
279                    if ancestor in avail_nodes:
280                        # check that the node satisfies level monophyly config
281                        ancestor_content = set([x.name for x in n2content[ancestor]])
282                        if not strict_monophyly or lin2sp[lin] == ancestor_content:
283                            _TARGET_NODES[ancestor].append(lin)
284                        elif strict_monophyly:
285                            log.log(26, "Discarding not monophyletic level @@11:%s@@1:" %lin)
286                    else:
287                        log.log(26, "Discarding upper clade @@11:%s@@1:" %lin)
288
289        for node in master_node.iter_leaves(is_leaf_fn=processable_node):
290            if opt_levels:
291                log.log(28, "Trying to optimizing custom tree level: @@11:%s@@1:" %_TARGET_NODES[node])
292                for lin in _TARGET_NODES[node]:
293                    # Marks the level as optimized, so is not computed again
294                    opt_levels[lin][0] = True
295
296            log.log(28, "Found possible target node of size %s branch support %f" %(len(n2content[node]), node.support))
297            log.log(28, "First suitable workflow: %s" %(node._target_wkname))
298
299            # Finds best outgroup for the target node
300            if npr_conf.use_outgroup:
301                splitterconfname, _ = npr_conf.tree_splitter
302                splitterconf = GLOBALS[threadid][splitterconfname]
303                #seqs, outs = select_outgroups(node, n2content, splitterconf)
304                #seqs, outs = select_closest_outgroup(node, n2content, splitterconf)
305                seqs, outs = select_sister_outgroup(node, n2content, splitterconf)
306            else:
307                seqs = set([_i.name for _i in n2content[node]])
308                outs = set()
309
310
311            if seqs | outs == tasktree_content:
312                log.log(26, "Discarding target node of size %s, due to identity with its parent node" %len(n2content[node]))
313                #print tasktree_content
314                #print seqs
315                #print outs
316                trees_to_browse.append(node)
317            else:
318                npr_nodes += 1
319                yield node, seqs, outs, node._target_wkname
320    log.log(28, "%s nodes will be optimized", npr_nodes)
321
322def get_next_npr_node(threadid, ttree, task_outgroups, mtree, alg_path, npr_conf, target_cladeids=None):
323    current_iter = get_iternumber(threadid)
324    if npr_conf.max_iters and current_iter >= npr_conf.max_iters:
325        log.warning("Maximum number of iterations reached!")
326        return
327
328    if not npr_conf.npr_workflows:
329        log.log(26, "NPR is disabled")
330        return
331
332    for node, seqs, outs, wkname in split_tree(ttree, task_outgroups, mtree, alg_path,
333                                               npr_conf, threadid, target_cladeids):
334        if npr_conf.max_iters and current_iter < npr_conf.max_iters:
335            log.log(28,
336                    "@@16:Target node of size %s with %s outgroups marked for a new NPR iteration!@@1:" %(
337                        len(seqs),
338                        len(outs)))
339            # Yield new iteration
340            inc_iternumber(threadid)
341            yield node, seqs, outs, wkname
342
343def select_closest_outgroup(target, n2content, splitterconf):
344    def sort_outgroups(x,y):
345        r = cmp(x[1], y[1]) # closer node
346        if r == 0:
347            r = -1 * cmp(len(n2content[x[0]]), len(n2content[y[0]])) # larger node
348            if r == 0:
349                r = -1 * cmp(x[0].support, y[0].support) # higher supported node
350                if r == 0:
351                    return cmp(x[0].cladeid, y[0].cladeid) # by content name
352                else:
353                    return r
354            else:
355                return r
356        else:
357            return r
358
359    if not target.up:
360        raise TaskError(None, "Cannot select outgroups for the root node!")
361
362    # Prepare cutoffs
363    out_topodist = tobool(splitterconf["_outgroup_topology_dist"])
364    max_outgroup_size = max(int(float(splitterconf["_max_outgroup_size"]) * len(n2content[target])), 1)
365    out_min_support = float(splitterconf["_min_outgroup_support"])
366
367    log.log(26, "Max outgroup size allowed %d" %max_outgroup_size)
368
369    # Gets a list of outside nodes an their distance to current target node
370    n2targetdist = distance_matrix_new(target, leaf_only=False,
371                                               topology_only=out_topodist)
372
373    valid_nodes = sorted([(node, ndist) for node, ndist in six.iteritems(n2targetdist)
374                          if not(n2content[node] & n2content[target])
375                          and node.support >= out_min_support
376                          and len(n2content[node])<=max_outgroup_size],
377                         sort_outgroups)
378    if valid_nodes:
379        best_outgroup = valid_nodes[0][0]
380    else:
381        print('\n'.join(sorted(["%s Size:%d Dist:%f Supp:%f" %(node.cladeid, len(n2content[node]), ndist, node.support)
382                                for node, ndist in six.iteritems(n2targetdist)],
383                               sort_outgroups)))
384        raise TaskError(None, "Could not find a suitable outgroup!")
385
386    log.log(20,
387            "Found possible outgroup Size:%d Distance:%f Support:%f",
388            len(n2content[best_outgroup]), n2targetdist[best_outgroup], best_outgroup.support)
389
390    log.log(20, "Supports: %0.2f (children=%s)", best_outgroup.support,
391            ','.join(["%0.2f" % ch.support for ch in
392                      best_outgroup.children]))
393
394    log.log(24, "best outgroup topology:\n%s", best_outgroup)
395    #print target
396    #print target.get_tree_root()
397
398    seqs = [n.name for n in n2content[target]]
399    outs = [n.name for n in n2content[best_outgroup]]
400
401    return set(seqs), set(outs)
402
403
404def select_sister_outgroup(target, n2content, splitterconf):
405    def sort_outgroups(x,y):
406        r = cmp(x[1], y[1]) # closer node
407        if r == 0:
408            r = -1 * cmp(len(n2content[x[0]]), len(n2content[y[0]])) # larger node
409            if r == 0:
410                r = -1 * cmp(x[0].support, y[0].support) # higher supported node
411                if r == 0:
412                    return cmp(x[0].cladeid, y[0].cladeid) # by content name
413                else:
414                    return r
415            else:
416                return r
417        else:
418            return r
419
420    if not target.up:
421        raise TaskError(None, "Cannot select outgroups for the root node!")
422
423    # Prepare cutoffs
424    out_topodist = tobool(splitterconf["_outgroup_topology_dist"])
425    out_min_support = float(splitterconf["_min_outgroup_support"])
426    if splitterconf["_max_outgroup_size"].strip().endswith("%"):
427        max_outgroup_size = max(1, round((float(splitterconf["_max_outgroup_size"].strip("%"))/100) * len(n2content[target])))
428        log.log(26, "Max outgroup size allowed %s = %d" %(splitterconf["_max_outgroup_size"], max_outgroup_size))
429    else:
430        max_outgroup_size = max(1, int(splitterconf["_max_outgroup_size"]))
431        log.log(26, "Max outgroup size allowed %d" %max_outgroup_size)
432
433    # Gets a list of outside nodes an their distance to current target node
434    n2targetdist = distance_matrix_new(target, leaf_only=False,
435                                               topology_only=out_topodist)
436
437    sister_content = n2content[target.get_sisters()[0]]
438
439    valid_nodes = sorted([(node, ndist) for node, ndist in six.iteritems(n2targetdist)
440                          if not(n2content[node] & n2content[target])
441                          and n2content[node].issubset(sister_content)
442                          and node.support >= out_min_support
443                          and len(n2content[node])<=max_outgroup_size],
444                         sort_outgroups)
445    if valid_nodes:
446        best_outgroup = valid_nodes[0][0]
447    else:
448        print('\n'.join(sorted(["%s Size:%d Distance:%f Support:%f" %(node.cladeid, len(n2content[node]), ndist, node.support)
449                                for node, ndist in six.iteritems(n2targetdist)],
450                               sort_outgroups)))
451        raise TaskError(None, "Could not find a suitable outgroup!")
452
453    log.log(20,
454            "Found possible outgroup Size:%d Dist:%f Supp:%f",
455            len(n2content[best_outgroup]), n2targetdist[best_outgroup], best_outgroup.support)
456
457    log.log(20, "Supports: %0.2f (children=%s)", best_outgroup.support,
458            ','.join(["%0.2f" % ch.support for ch in
459                      best_outgroup.children]))
460
461    log.log(24, "best outgroup topology:\n%s", best_outgroup)
462    #print target
463    #print target.get_tree_root()
464
465    seqs = [n.name for n in n2content[target]]
466    outs = [n.name for n in n2content[best_outgroup]]
467
468    return set(seqs), set(outs)
469
470
471
472
473
474
475def select_outgroups(target, n2content, splitterconf):
476    """Given a set of target sequences, find the best set of out
477    sequences to use. Several ways can be selected to find out
478    sequences:
479    """
480
481    name2dist = {"min": _min, "max": _max,
482                 "mean":_mean, "median":_median}
483
484
485    #policy = splitterconf["_outgroup_policy"]  # node or leaves
486    out_topodist = tobool(splitterconf["_outgroup_topology_dist"])
487    optimal_out_size = int(splitterconf["_max_outgroup_size"])
488    #out_distfn = splitterconf["_outgroup_dist"]
489    out_min_support = float(splitterconf["_outgroup_min_support"])
490
491    if not target.up:
492        raise TaskError(None, "Cannot select outgroups for the root node!")
493    if not optimal_out_size:
494        raise TaskError(None, "You are trying to set 0 outgroups!")
495
496    # Gets a list of outside nodes an their distance to current target node
497    n2targetdist = distance_matrix_new(target, leaf_only=False,
498                                               topology_only=out_topodist)
499
500    #kk, test = distance_matrix(target, leaf_only=False,
501    #                       topology_only=False)
502
503    #for x in test:
504    #    if test[x] != n2targetdist[x]:
505    #        print x
506    #        print test[x],  n2targetdist[x]
507    #        print x.get_distance(target)
508    #        raw_input("ERROR!")
509
510    score = lambda _n: (_n.support,
511                        #len(n2content[_n])/float(optimal_out_size),
512                        1 - (abs(optimal_out_size - len(n2content[_n])) / float(max(optimal_out_size, len(n2content[_n])))), # outgroup size
513                        1 - (n2targetdist[_n] / max_dist) #outgroup proximity to target
514                        )
515
516    def sort_outgroups(x,y):
517        score_x = set(score(x))
518        score_y = set(score(y))
519        while score_x:
520            min_score_x = min(score_x)
521
522            v = cmp(min_score_x, min(score_y))
523            if v == 0:
524                score_x.discard(min_score_x)
525                score_y.discard(min_score_x)
526            else:
527                break
528        # If still equal, sort by cladid to maintain reproducibility
529        if v == 0:
530            v = cmp(x.cladeid, y.cladeid)
531        return v
532
533    #del n2targetdist[target.get_tree_root()]
534    max_dist = max(n2targetdist.values())
535    valid_nodes = [n for n in n2targetdist if \
536                       not n2content[n] & n2content[target] and
537                       n.support >= out_min_support]
538    if not valid_nodes:
539        raise TaskError(None, "Could not find a suitable outgroup (min_support=%s)"\
540                      %out_min_support)
541    valid_nodes.sort(sort_outgroups, reverse=True)
542    best_outgroup = valid_nodes[0]
543    seqs = [n.name for n in n2content[target]]
544    outs = [n.name for n in n2content[best_outgroup]]
545
546    log.log(20,
547            "Found possible outgroup of size %s: score (support,size,dist)=%s",
548            len(outs), score(best_outgroup))
549
550    log.log(20, "Supports: %0.2f (children=%s)", best_outgroup.support,
551            ','.join(["%0.2f" % ch.support for ch in
552                      best_outgroup.children]))
553
554    if DEBUG():
555        root = target.get_tree_root()
556        for _seq in outs:
557            tar =  root & _seq
558            tar.img_style["fgcolor"]="green"
559            tar.img_style["size"] = 12
560            tar.img_style["shape"] = "circle"
561        target.img_style["bgcolor"] = "lightblue"
562        NPR_TREE_STYLE.title.clear()
563        NPR_TREE_STYLE.title.add_face( faces.TextFace("MainTree:"
564            " Outgroup selection is mark in green. Red=optimized nodes ",
565            fgcolor="blue"), 0)
566        root.show(tree_style=NPR_TREE_STYLE)
567        for _n in root.traverse():
568            _n.img_style = None
569
570    return set(seqs), set(outs)
571
572def distance_matrix_new(target, leaf_only=False, topology_only=False):
573    t = target.get_tree_root()
574    real_outgroup = t.children[0]
575    t.set_outgroup(target)
576
577    n2dist = {target:0}
578    for n in target.get_descendants("preorder"):
579        n2dist[n] = n2dist[n.up] + (topology_only or n.dist)
580
581    sister = target.get_sisters()[0]
582    n2dist[sister] = (topology_only or sister.dist)+ (topology_only or target.dist)
583    for n in sister.get_descendants("preorder"):
584        n2dist[n] = n2dist[n.up] + (topology_only or n.dist)
585
586    t.set_outgroup(real_outgroup)
587
588    ## Slow Test.
589    # for n in t.get_descendants():
590    #     if float(str(target.get_distance(n))) != float(str(n2dist[n])):
591    #         print n
592    #         print target.get_distance(n), n2dist[n]
593    #         raw_input("ERROR")
594    return n2dist
595
596
597def assembly_tree(runid):
598    task_nodes = db.get_runid_nodes(runid)
599    task_nodes.reverse()
600
601    main_tree = None
602    iternumber = 1
603    while task_nodes:
604        cladeid, packtree, size = task_nodes.pop(-1)
605        if not packtree:
606            continue
607        tree = db.decode(packtree)
608
609        # print tree.dist
610        # Restore original gene names
611        for leaf in tree.iter_leaves():
612            leaf.add_features(safename=leaf.name)
613            leaf.name = leaf.realname
614
615        if main_tree:
616            # substitute node in main tree by the optimized one
617            target_node = main_tree.search_nodes(cladeid=cladeid)[0]
618            target_node.up.add_child(tree)
619            target_node.detach()
620        else:
621            main_tree = tree
622
623        iter_name = "Iter_%04d_%dseqs" %(iternumber, size)
624        tree.add_features(iternumber=iternumber)
625        iternumber += 1
626    return main_tree, iternumber
627
628
629def get_cmd_log(task):
630    cmd_lines = []
631    if getattr(task, 'get_launch_cmd', None):
632        launch_cmd = task.get_launch_cmd()
633        tm_s, tm_e = read_time_file(task.time_file)
634        cmd_lines.append([task.jobid, sec2time(tm_e - tm_s), task.jobname, launch_cmd])
635    if getattr(task, 'jobs', None):
636        for subtask in task.jobs:
637            cmd_lines.extend(get_cmd_log(subtask))
638    return cmd_lines
639