1#!/usr/bin/env python
2
3#import built-in modules
4import os,sys
5import re
6import string
7from optparse import OptionParser
8import warnings
9import string
10import collections
11import math
12
13#import third-party modules
14
15__author__ = "Liguo Wang"
16__copyright__ = "Copyleft"
17__credits__ = []
18__license__ = "GPL"
19__version__="3.0.0"
20__maintainer__ = "Liguo Wang"
21__email__ = "wang.liguo@mayo.edu"
22__status__ = "Production"
23
24
25def RSS(arg):
26    '''calculate Square root of sum of square. Input is ',' separated numbers'''
27    lst=arg.split(',')
28    lst_sum=0
29    for i in [ int(i)**2 for i in lst]:
30        lst_sum += i
31    #nsr=10*math.log10((1+noi_sum**0.5)/(1+sig_sum**0.5))
32    return lst_sum**0.5
33
34def H_mean(arg):
35    '''calculate harmornic mean. Input is ',' separated numbers'''
36    lst=[1/float(i) for i in arg.split(',') if float(i) !=0]
37    if len(lst) == 0:
38        return "NA"
39    else:
40        return len(lst)/(sum(lst))
41
42def shannon_entropy(arg):
43    '''calculate shannon's entropy (or Shannon-Wiener index).'''
44    lst=arg
45    lst=[float(i) for i in lst if float(i)>0]
46    entropy=0.0
47    for i in lst:
48        entropy += (i/sum(lst)) * math.log((i/sum(lst)))
49    if entropy == 0:
50        return 0
51    else:
52        return -entropy
53
54
55def shannon_entropy_es(arg):
56    '''calculate estimator of shannon's entropy (Chao & Shen, 2003)'''
57    lst=arg
58    lst=[float(i) for i in lst if float(i)>0]
59    if sum(lst)<=0 or min(lst)<0:return "NA"    #if there is no fragmental splicing
60    if (len(lst)==1): return 0                  #if there is only 1 fragmental splicing
61    lst.append(2)
62
63    #estimate C_bar
64    singleton=0
65    entropy=0.0
66    for i in lst:
67        if i ==1:singleton +=1
68
69    C_bar = 1- (singleton/sum(lst))
70    for i in lst:entropy += ( (C_bar*i/sum(lst)) * math.log((C_bar*i/sum(lst))) )/(1-(1-C_bar*i/sum(lst))**sum(lst))
71    if entropy == 0:
72        return 0
73    else:
74        return -entropy
75
76def shannon_entropy_ht(arg):
77    '''calculate estimator of shannon's entropy based on Horzitz-Thompson'''
78    lst=arg.split(',')
79    lst=[float(i) for i in lst if float(i)>0]
80    if sum(lst)<=0 or min(lst)<0:return "NA"    #if there is no fragmental splicing
81    if (len(lst)==1): return 0                  #if there is only 1 fragmental splicing
82
83    #estimate C_bar
84    entropy=0.0
85    for i in lst:
86        entropy += ( (i/sum(lst)) * math.log((i/sum(lst))) )/(1-(1-i/sum(lst))**sum(lst))
87    return -entropy
88
89def simpson_index(arg):
90    '''calculate Gini-Simpson's index. Input is ',' separated numbers'''
91    lst=arg.split(',')
92    lst=[float(i) for i in lst if float(i)>0]
93    simpson=0.0
94
95    try:
96        for i in lst:
97            simpson = simpson + (i/sum(lst))**2
98        return 1-simpson
99    except: return 0
100
101def simpson_index_es(arg):
102    '''calculate estimator Gini-Simpson's index. Input is ',' separated numbers'''
103    lst=arg.split(',')
104    lst=[float(i) for i in lst if float(i)>0]
105    simpson=0.0
106
107    try:
108        for i in lst:
109            simpson = simpson + i*(i-1)
110        return 1- (simpson/(sum(lst)*(sum(lst)-1)))
111    except: return 0
112
113def Hill_number(arg,qvalue=1):
114    '''Calculate real diversity (Hill's number). Input is ',' separated numbers. qvalue is the only
115    parameter for Hill's function. When q=1, it return exp(H) which is the effective number of junctions
116    calculated by Shannon's entropy. When q<1, Hill's function was favors low frequency junctions.
117    When q>1, Hill's function was favors high frequency junctions (common junctions). Simpon's Index
118    is particular case of Hill's function as q=2'''
119
120    lst=arg.split(',')
121    lst=[float(i) for i in lst if float(i)>0]
122    freq=[(i/sum(lst))**qvalue for i in lst]
123    try:
124        return (sum(freq))**(1/(1-qvalue))
125    except:
126        return math.exp(shannon_entropy(arg))
127import math
128import functools
129
130def percentile(N, percent, key=lambda x:x):
131    """
132    Find the percentile of a list of values.
133
134    @parameter N - is a list of values. Note N MUST BE already sorted.
135    @parameter percent - a float value from 0 to 100.
136    @parameter key - optional key function to compute value from each element of N.
137
138    @return - the percentile of the values
139    """
140    if not N:
141        return None
142    k = (len(N)-1) * percent/100.0
143    f = math.floor(k)
144    c = math.ceil(k)
145    if f == c:
146        return key(N[int(k)])
147    d0 = key(N[int(f)]) * (c-k)
148    d1 = key(N[int(c)]) * (k-f)
149    return d0+d1
150
151def percentile_list(N):
152    """
153    Find the percentile of a list of values.
154    @parameter N - is a list of values. Note N MUST BE already sorted.
155    @return - the list of percentile of the values
156    """
157    if not N:return None
158    if len(N) <100: return N
159    per_list=[]
160    for i in range(1,101):
161        k = (len(N)-1) * i/100.0
162        f = math.floor(k)
163        c = math.ceil(k)
164        if f == c:
165            per_list.append( int(N[int(k)])  )
166        else:
167            d0 = N[int(f)] * (c-k)
168            d1 = N[int(c)] * (k-f)
169            per_list.append(int(round(d0+d1)))
170    return per_list
171