1""" Orange data table and other data manipulation utilities """
2import numbers
3from types import SimpleNamespace
4from typing import Tuple, Sequence
5from functools import wraps
6
7from Orange.data import Table, Variable
8from Orange.widgets.widget import Msg, OWWidget
9from Orange.widgets.utils.messages import UnboundMsg
10
11__missing_annotation = UnboundMsg('Missing annotation on gene IDs and organism in the input data.')
12__missing_gene_id = UnboundMsg('Missing gene ID information. Make sure that Table is properly annotated.')
13__missing_tax_id = UnboundMsg('Missing organism information. Make sure that Table is properly annotated.')
14__unable_to_locate_genes = UnboundMsg('Unable to locate genes. Make sure that Table is properly annotated.')
15
16
17class TableAnnotation(SimpleNamespace):
18    """ Data Table hints """
19
20    # Organism in data table
21    tax_id: str = 'taxonomy_id'
22
23    # This indicates position of genes in data table
24    gene_as_attr_name: str = 'gene_as_attribute_name'
25
26    # This indicates a column name (if genes are in rows)
27    gene_id_column: str = 'gene_id_column'
28
29    # This indicates attribute name (if genes are in columns)
30    gene_id_attribute: str = 'gene_id_attribute'
31
32
33def check_table_annotation(f):
34    """Wrapper for widget's input method that checks if the data on the input is correctly annotated.
35
36    A widget in bioinformatics add-on expects that every Table has additional
37    information stored as table attributes:
38       - taxonomy_id = 'taxonomy id for given organism'
39       - gene_as_attribute_name = 'location of gene names (rows/columns)'
40       - gene_id_attribute/gene_id_column = 'attribute/column name'
41
42    """
43
44    @wraps(f)
45    def wrapper(widget, data: Table, *args, **kwargs):
46        widget.Error.add_message('missing_annotation', __missing_annotation)
47        widget.Error.add_message('missing_gene_id', __missing_gene_id)
48        widget.Error.add_message('missing_tax_id', __missing_tax_id)
49        widget.Error.add_message('unable_to_locate_genes', __unable_to_locate_genes)
50
51        widget.Error.missing_annotation.clear()
52        widget.Error.missing_gene_id.clear()
53        widget.Error.missing_tax_id.clear()
54        widget.Error.unable_to_locate_genes.clear()
55
56        if data is not None and isinstance(data, Table):
57            attributes: dict = data.attributes
58
59            tax_id: str = TableAnnotation.tax_id
60            gene_id_column: str = TableAnnotation.gene_id_column
61            gene_id_attribute: str = TableAnnotation.gene_id_attribute
62            gene_as_attr_name: str = TableAnnotation.gene_as_attr_name
63
64            if not attributes:
65                widget.Error.missing_annotation()
66                data = None
67
68            elif tax_id not in attributes:
69                widget.Error.missing_tax_id()
70                data = None
71
72            elif gene_as_attr_name not in attributes:
73                widget.Error.unable_to_locate_genes()
74                data = None
75
76            elif gene_as_attr_name in attributes:
77                if (attributes[gene_as_attr_name] and gene_id_attribute not in attributes) or (
78                    not attributes[gene_as_attr_name] and gene_id_column not in attributes
79                ):
80
81                    widget.Error.unable_to_locate_genes()
82                    data = None
83
84        return f(widget, data, *args, **kwargs)
85
86    return wrapper
87
88
89# TODO: remove this and replace with TableAnnotation namespace
90
91# species
92TAX_ID = 'taxonomy_id'
93
94# Will be set to True if gene names are represented as attribute names.
95# If gene names are in rows, we set this value to False. (user must select proper column index)
96GENE_AS_ATTRIBUTE_NAME = 'gene_as_attribute_name'
97
98# Name of the column where rows are gene ids
99GENE_ID_COLUMN = 'gene_id_column'
100
101# Name of the variable attribute that holds gene id
102GENE_ID_ATTRIBUTE = 'gene_id_attribute'
103
104# Error strings
105ERROR_ON_MISSING_ANNOTATION = 'Missing annotation on gene IDs and organism in the input data.'
106ERROR_ON_MISSING_GENE_ID = 'Missing gene ID information in the input data'
107ERROR_ON_MISSING_TAX_ID = 'Missing organism information in the input data'
108