1#!/usr/bin/env python 2 3#import built-in modules 4import os,sys 5import re 6import string 7from optparse import OptionParser 8import warnings 9import string 10import collections 11import math 12 13#import third-party modules 14 15__author__ = "Liguo Wang" 16__copyright__ = "Copyleft" 17__credits__ = [] 18__license__ = "GPL" 19__version__="3.0.0" 20__maintainer__ = "Liguo Wang" 21__email__ = "wang.liguo@mayo.edu" 22__status__ = "Production" 23 24 25def RSS(arg): 26 '''calculate Square root of sum of square. Input is ',' separated numbers''' 27 lst=arg.split(',') 28 lst_sum=0 29 for i in [ int(i)**2 for i in lst]: 30 lst_sum += i 31 #nsr=10*math.log10((1+noi_sum**0.5)/(1+sig_sum**0.5)) 32 return lst_sum**0.5 33 34def H_mean(arg): 35 '''calculate harmornic mean. Input is ',' separated numbers''' 36 lst=[1/float(i) for i in arg.split(',') if float(i) !=0] 37 if len(lst) == 0: 38 return "NA" 39 else: 40 return len(lst)/(sum(lst)) 41 42def shannon_entropy(arg): 43 '''calculate shannon's entropy (or Shannon-Wiener index).''' 44 lst=arg 45 lst=[float(i) for i in lst if float(i)>0] 46 entropy=0.0 47 for i in lst: 48 entropy += (i/sum(lst)) * math.log((i/sum(lst))) 49 if entropy == 0: 50 return 0 51 else: 52 return -entropy 53 54 55def shannon_entropy_es(arg): 56 '''calculate estimator of shannon's entropy (Chao & Shen, 2003)''' 57 lst=arg 58 lst=[float(i) for i in lst if float(i)>0] 59 if sum(lst)<=0 or min(lst)<0:return "NA" #if there is no fragmental splicing 60 if (len(lst)==1): return 0 #if there is only 1 fragmental splicing 61 lst.append(2) 62 63 #estimate C_bar 64 singleton=0 65 entropy=0.0 66 for i in lst: 67 if i ==1:singleton +=1 68 69 C_bar = 1- (singleton/sum(lst)) 70 for i in lst:entropy += ( (C_bar*i/sum(lst)) * math.log((C_bar*i/sum(lst))) )/(1-(1-C_bar*i/sum(lst))**sum(lst)) 71 if entropy == 0: 72 return 0 73 else: 74 return -entropy 75 76def shannon_entropy_ht(arg): 77 '''calculate estimator of shannon's entropy based on Horzitz-Thompson''' 78 lst=arg.split(',') 79 lst=[float(i) for i in lst if float(i)>0] 80 if sum(lst)<=0 or min(lst)<0:return "NA" #if there is no fragmental splicing 81 if (len(lst)==1): return 0 #if there is only 1 fragmental splicing 82 83 #estimate C_bar 84 entropy=0.0 85 for i in lst: 86 entropy += ( (i/sum(lst)) * math.log((i/sum(lst))) )/(1-(1-i/sum(lst))**sum(lst)) 87 return -entropy 88 89def simpson_index(arg): 90 '''calculate Gini-Simpson's index. Input is ',' separated numbers''' 91 lst=arg.split(',') 92 lst=[float(i) for i in lst if float(i)>0] 93 simpson=0.0 94 95 try: 96 for i in lst: 97 simpson = simpson + (i/sum(lst))**2 98 return 1-simpson 99 except: return 0 100 101def simpson_index_es(arg): 102 '''calculate estimator Gini-Simpson's index. Input is ',' separated numbers''' 103 lst=arg.split(',') 104 lst=[float(i) for i in lst if float(i)>0] 105 simpson=0.0 106 107 try: 108 for i in lst: 109 simpson = simpson + i*(i-1) 110 return 1- (simpson/(sum(lst)*(sum(lst)-1))) 111 except: return 0 112 113def Hill_number(arg,qvalue=1): 114 '''Calculate real diversity (Hill's number). Input is ',' separated numbers. qvalue is the only 115 parameter for Hill's function. When q=1, it return exp(H) which is the effective number of junctions 116 calculated by Shannon's entropy. When q<1, Hill's function was favors low frequency junctions. 117 When q>1, Hill's function was favors high frequency junctions (common junctions). Simpon's Index 118 is particular case of Hill's function as q=2''' 119 120 lst=arg.split(',') 121 lst=[float(i) for i in lst if float(i)>0] 122 freq=[(i/sum(lst))**qvalue for i in lst] 123 try: 124 return (sum(freq))**(1/(1-qvalue)) 125 except: 126 return math.exp(shannon_entropy(arg)) 127import math 128import functools 129 130def percentile(N, percent, key=lambda x:x): 131 """ 132 Find the percentile of a list of values. 133 134 @parameter N - is a list of values. Note N MUST BE already sorted. 135 @parameter percent - a float value from 0 to 100. 136 @parameter key - optional key function to compute value from each element of N. 137 138 @return - the percentile of the values 139 """ 140 if not N: 141 return None 142 k = (len(N)-1) * percent/100.0 143 f = math.floor(k) 144 c = math.ceil(k) 145 if f == c: 146 return key(N[int(k)]) 147 d0 = key(N[int(f)]) * (c-k) 148 d1 = key(N[int(c)]) * (k-f) 149 return d0+d1 150 151def percentile_list(N): 152 """ 153 Find the percentile of a list of values. 154 @parameter N - is a list of values. Note N MUST BE already sorted. 155 @return - the list of percentile of the values 156 """ 157 if not N:return None 158 if len(N) <100: return N 159 per_list=[] 160 for i in range(1,101): 161 k = (len(N)-1) * i/100.0 162 f = math.floor(k) 163 c = math.ceil(k) 164 if f == c: 165 per_list.append( int(N[int(k)]) ) 166 else: 167 d0 = N[int(f)] * (c-k) 168 d1 = N[int(c)] * (k-f) 169 per_list.append(int(round(d0+d1))) 170 return per_list 171