1import numpy as np 2from scipy.stats import zscore, rankdata 3from sklearn.preprocessing import quantile_transform 4 5from Orange.data.table import Table 6from Orange.preprocess.preprocess import Preprocess 7 8 9class LogarithmicScale(Preprocess): 10 def __call__(self, data) -> Table: 11 _data = data.copy() 12 _data.X = np.log2(data.X + 1) 13 return _data 14 15 16class ZScore(Preprocess): 17 """ 18 Compute the z score. 19 20 Detailed description: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.zscore.html 21 """ 22 23 def __init__(self, axis=0): 24 self.axis = axis 25 26 def __call__(self, data) -> Table: 27 _data = data.copy() 28 _data.X = zscore(data.X, axis=self.axis) 29 _data.X[np.isnan(_data.X)] = 0 30 return _data 31 32 33class QuantileTransform(Preprocess): 34 """ 35 Transform features to follow a uniform or a normal distribution. 36 37 Detailed description: https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.quantile_transform.html 38 """ 39 40 def __init__(self, axis=0, n_quantiles=1000, output_distribution='uniform'): 41 self.axis = axis 42 self.n_quantiles = n_quantiles 43 self.output_distribution = output_distribution 44 45 def __call__(self, data) -> Table: 46 _data = data.copy() 47 _data.X = quantile_transform( 48 _data.X, 49 n_quantiles=self.n_quantiles, 50 output_distribution=self.output_distribution, 51 copy=True, 52 axis=self.axis, 53 ) 54 return _data 55 56 57class QuantileNormalization(Preprocess): 58 """ 59 Quantile normalize a test distribution to a reference distribution 60 of the same length by taking the average of each quantile across samples. 61 62 Detailed description: https://en.wikipedia.org/wiki/Quantile_normalization 63 """ 64 65 def __call__(self, data) -> Table: 66 _data = data.copy() 67 68 mean = np.mean(np.sort(_data.X, axis=1), axis=0) 69 rank = rankdata(_data.X, method='average', axis=1) - 1 70 71 rank_floor = rank.astype(int) 72 rank_ceil = np.ceil(rank).astype(int) 73 _data.X = (mean.take(rank_floor) + mean.take(rank_ceil)) / 2 74 75 return _data 76