1""" Orange data table and other data manipulation utilities """ 2import numbers 3from types import SimpleNamespace 4from typing import Tuple, Sequence 5from functools import wraps 6 7from Orange.data import Table, Variable 8from Orange.widgets.widget import Msg, OWWidget 9from Orange.widgets.utils.messages import UnboundMsg 10 11__missing_annotation = UnboundMsg('Missing annotation on gene IDs and organism in the input data.') 12__missing_gene_id = UnboundMsg('Missing gene ID information. Make sure that Table is properly annotated.') 13__missing_tax_id = UnboundMsg('Missing organism information. Make sure that Table is properly annotated.') 14__unable_to_locate_genes = UnboundMsg('Unable to locate genes. Make sure that Table is properly annotated.') 15 16 17class TableAnnotation(SimpleNamespace): 18 """ Data Table hints """ 19 20 # Organism in data table 21 tax_id: str = 'taxonomy_id' 22 23 # This indicates position of genes in data table 24 gene_as_attr_name: str = 'gene_as_attribute_name' 25 26 # This indicates a column name (if genes are in rows) 27 gene_id_column: str = 'gene_id_column' 28 29 # This indicates attribute name (if genes are in columns) 30 gene_id_attribute: str = 'gene_id_attribute' 31 32 33def check_table_annotation(f): 34 """Wrapper for widget's input method that checks if the data on the input is correctly annotated. 35 36 A widget in bioinformatics add-on expects that every Table has additional 37 information stored as table attributes: 38 - taxonomy_id = 'taxonomy id for given organism' 39 - gene_as_attribute_name = 'location of gene names (rows/columns)' 40 - gene_id_attribute/gene_id_column = 'attribute/column name' 41 42 """ 43 44 @wraps(f) 45 def wrapper(widget, data: Table, *args, **kwargs): 46 widget.Error.add_message('missing_annotation', __missing_annotation) 47 widget.Error.add_message('missing_gene_id', __missing_gene_id) 48 widget.Error.add_message('missing_tax_id', __missing_tax_id) 49 widget.Error.add_message('unable_to_locate_genes', __unable_to_locate_genes) 50 51 widget.Error.missing_annotation.clear() 52 widget.Error.missing_gene_id.clear() 53 widget.Error.missing_tax_id.clear() 54 widget.Error.unable_to_locate_genes.clear() 55 56 if data is not None and isinstance(data, Table): 57 attributes: dict = data.attributes 58 59 tax_id: str = TableAnnotation.tax_id 60 gene_id_column: str = TableAnnotation.gene_id_column 61 gene_id_attribute: str = TableAnnotation.gene_id_attribute 62 gene_as_attr_name: str = TableAnnotation.gene_as_attr_name 63 64 if not attributes: 65 widget.Error.missing_annotation() 66 data = None 67 68 elif tax_id not in attributes: 69 widget.Error.missing_tax_id() 70 data = None 71 72 elif gene_as_attr_name not in attributes: 73 widget.Error.unable_to_locate_genes() 74 data = None 75 76 elif gene_as_attr_name in attributes: 77 if (attributes[gene_as_attr_name] and gene_id_attribute not in attributes) or ( 78 not attributes[gene_as_attr_name] and gene_id_column not in attributes 79 ): 80 81 widget.Error.unable_to_locate_genes() 82 data = None 83 84 return f(widget, data, *args, **kwargs) 85 86 return wrapper 87 88 89# TODO: remove this and replace with TableAnnotation namespace 90 91# species 92TAX_ID = 'taxonomy_id' 93 94# Will be set to True if gene names are represented as attribute names. 95# If gene names are in rows, we set this value to False. (user must select proper column index) 96GENE_AS_ATTRIBUTE_NAME = 'gene_as_attribute_name' 97 98# Name of the column where rows are gene ids 99GENE_ID_COLUMN = 'gene_id_column' 100 101# Name of the variable attribute that holds gene id 102GENE_ID_ATTRIBUTE = 'gene_id_attribute' 103 104# Error strings 105ERROR_ON_MISSING_ANNOTATION = 'Missing annotation on gene IDs and organism in the input data.' 106ERROR_ON_MISSING_GENE_ID = 'Missing gene ID information in the input data' 107ERROR_ON_MISSING_TAX_ID = 'Missing organism information in the input data' 108