1package stats
2
3import (
4	"math"
5	"math/rand"
6	"strings"
7	"time"
8)
9
10// NormPpfRvs generates random variates using the Point Percentile Function.
11// For more information please visit: https://demonstrations.wolfram.com/TheMethodOfInverseTransforms/
12func NormPpfRvs(loc float64, scale float64, size int) []float64 {
13	rand.Seed(time.Now().UnixNano())
14	var toReturn []float64
15	for i := 0; i < size; i++ {
16		toReturn = append(toReturn, NormPpf(rand.Float64(), loc, scale))
17	}
18	return toReturn
19}
20
21// NormBoxMullerRvs generates random variates using the Box–Muller transform.
22// For more information please visit: http://mathworld.wolfram.com/Box-MullerTransformation.html
23func NormBoxMullerRvs(loc float64, scale float64, size int) []float64 {
24	rand.Seed(time.Now().UnixNano())
25	var toReturn []float64
26	for i := 0; i < int(float64(size/2)+float64(size%2)); i++ {
27		// u1 and u2 are uniformly distributed random numbers between 0 and 1.
28		u1 := rand.Float64()
29		u2 := rand.Float64()
30		// x1 and x2 are normally distributed random numbers.
31		x1 := loc + (scale * (math.Sqrt(-2*math.Log(u1)) * math.Cos(2*math.Pi*u2)))
32		toReturn = append(toReturn, x1)
33		if (i+1)*2 <= size {
34			x2 := loc + (scale * (math.Sqrt(-2*math.Log(u1)) * math.Sin(2*math.Pi*u2)))
35			toReturn = append(toReturn, x2)
36		}
37	}
38	return toReturn
39}
40
41// NormPdf is the probability density function.
42func NormPdf(x float64, loc float64, scale float64) float64 {
43	return (math.Pow(math.E, -(math.Pow(x-loc, 2))/(2*math.Pow(scale, 2)))) / (scale * math.Sqrt(2*math.Pi))
44}
45
46// NormLogPdf is the log of the probability density function.
47func NormLogPdf(x float64, loc float64, scale float64) float64 {
48	return math.Log((math.Pow(math.E, -(math.Pow(x-loc, 2))/(2*math.Pow(scale, 2)))) / (scale * math.Sqrt(2*math.Pi)))
49}
50
51// NormCdf is the cumulative distribution function.
52func NormCdf(x float64, loc float64, scale float64) float64 {
53	return 0.5 * (1 + math.Erf((x-loc)/(scale*math.Sqrt(2))))
54}
55
56// NormLogCdf is the log of the cumulative distribution function.
57func NormLogCdf(x float64, loc float64, scale float64) float64 {
58	return math.Log(0.5 * (1 + math.Erf((x-loc)/(scale*math.Sqrt(2)))))
59}
60
61// NormSf is the survival function (also defined as 1 - cdf, but sf is sometimes more accurate).
62func NormSf(x float64, loc float64, scale float64) float64 {
63	return 1 - 0.5*(1+math.Erf((x-loc)/(scale*math.Sqrt(2))))
64}
65
66// NormLogSf is the log of the survival function.
67func NormLogSf(x float64, loc float64, scale float64) float64 {
68	return math.Log(1 - 0.5*(1+math.Erf((x-loc)/(scale*math.Sqrt(2)))))
69}
70
71// NormPpf is the point percentile function.
72// This is based on Peter John Acklam's inverse normal CDF.
73// algorithm: http://home.online.no/~pjacklam/notes/invnorm/ (no longer visible).
74// For more information please visit: https://stackedboxes.org/2017/05/01/acklams-normal-quantile-function/
75func NormPpf(p float64, loc float64, scale float64) (x float64) {
76	const (
77		a1 = -3.969683028665376e+01
78		a2 = 2.209460984245205e+02
79		a3 = -2.759285104469687e+02
80		a4 = 1.383577518672690e+02
81		a5 = -3.066479806614716e+01
82		a6 = 2.506628277459239e+00
83
84		b1 = -5.447609879822406e+01
85		b2 = 1.615858368580409e+02
86		b3 = -1.556989798598866e+02
87		b4 = 6.680131188771972e+01
88		b5 = -1.328068155288572e+01
89
90		c1 = -7.784894002430293e-03
91		c2 = -3.223964580411365e-01
92		c3 = -2.400758277161838e+00
93		c4 = -2.549732539343734e+00
94		c5 = 4.374664141464968e+00
95		c6 = 2.938163982698783e+00
96
97		d1 = 7.784695709041462e-03
98		d2 = 3.224671290700398e-01
99		d3 = 2.445134137142996e+00
100		d4 = 3.754408661907416e+00
101
102		plow  = 0.02425
103		phigh = 1 - plow
104	)
105
106	if p < 0 || p > 1 {
107		return math.NaN()
108	} else if p == 0 {
109		return -math.Inf(0)
110	} else if p == 1 {
111		return math.Inf(0)
112	}
113
114	if p < plow {
115		q := math.Sqrt(-2 * math.Log(p))
116		x = (((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) /
117			((((d1*q+d2)*q+d3)*q+d4)*q + 1)
118	} else if phigh < p {
119		q := math.Sqrt(-2 * math.Log(1-p))
120		x = -(((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) /
121			((((d1*q+d2)*q+d3)*q+d4)*q + 1)
122	} else {
123		q := p - 0.5
124		r := q * q
125		x = (((((a1*r+a2)*r+a3)*r+a4)*r+a5)*r + a6) * q /
126			(((((b1*r+b2)*r+b3)*r+b4)*r+b5)*r + 1)
127	}
128
129	e := 0.5*math.Erfc(-x/math.Sqrt2) - p
130	u := e * math.Sqrt(2*math.Pi) * math.Exp(x*x/2)
131	x = x - u/(1+x*u/2)
132
133	return x*scale + loc
134}
135
136// NormIsf is the inverse survival function (inverse of sf).
137func NormIsf(p float64, loc float64, scale float64) (x float64) {
138	if -NormPpf(p, loc, scale) == 0 {
139		return 0
140	}
141	return -NormPpf(p, loc, scale)
142}
143
144// NormMoment approximates the non-central (raw) moment of order n.
145// For more information please visit: https://math.stackexchange.com/questions/1945448/methods-for-finding-raw-moments-of-the-normal-distribution
146func NormMoment(n int, loc float64, scale float64) float64 {
147	toReturn := 0.0
148	for i := 0; i < n+1; i++ {
149		if (n-i)%2 == 0 {
150			toReturn += float64(Ncr(n, i)) * (math.Pow(loc, float64(i))) * (math.Pow(scale, float64(n-i))) *
151				(float64(factorial(n-i)) / ((math.Pow(2.0, float64((n-i)/2))) *
152					float64(factorial((n-i)/2))))
153		}
154	}
155	return toReturn
156}
157
158// NormStats returns the mean, variance, skew, and/or kurtosis.
159// Mean(‘m’), variance(‘v’), skew(‘s’), and/or kurtosis(‘k’).
160// Takes string containing any of 'mvsk'.
161// Returns array of m v s k in that order.
162func NormStats(loc float64, scale float64, moments string) []float64 {
163	var toReturn []float64
164	if strings.ContainsAny(moments, "m") {
165		toReturn = append(toReturn, loc)
166	}
167	if strings.ContainsAny(moments, "v") {
168		toReturn = append(toReturn, math.Pow(scale, 2))
169	}
170	if strings.ContainsAny(moments, "s") {
171		toReturn = append(toReturn, 0.0)
172	}
173	if strings.ContainsAny(moments, "k") {
174		toReturn = append(toReturn, 0.0)
175	}
176	return toReturn
177}
178
179// NormEntropy is the differential entropy of the RV.
180func NormEntropy(loc float64, scale float64) float64 {
181	return math.Log(scale * math.Sqrt(2*math.Pi*math.E))
182}
183
184// NormFit returns the maximum likelihood estimators for the Normal Distribution.
185// Takes array of float64 values.
186// Returns array of Mean followed by Standard Deviation.
187func NormFit(data []float64) [2]float64 {
188	sum := 0.00
189	for i := 0; i < len(data); i++ {
190		sum += data[i]
191	}
192	mean := sum / float64(len(data))
193	stdNumerator := 0.00
194	for i := 0; i < len(data); i++ {
195		stdNumerator += math.Pow(data[i]-mean, 2)
196	}
197	return [2]float64{mean, math.Sqrt((stdNumerator) / (float64(len(data))))}
198}
199
200// NormMedian is the median of the distribution.
201func NormMedian(loc float64, scale float64) float64 {
202	return loc
203}
204
205// NormMean is the mean/expected value of the distribution.
206func NormMean(loc float64, scale float64) float64 {
207	return loc
208}
209
210// NormVar is the variance of the distribution.
211func NormVar(loc float64, scale float64) float64 {
212	return math.Pow(scale, 2)
213}
214
215// NormStd is the standard deviation of the distribution.
216func NormStd(loc float64, scale float64) float64 {
217	return scale
218}
219
220// NormInterval finds endpoints of the range that contains alpha percent of the distribution.
221func NormInterval(alpha float64, loc float64, scale float64) [2]float64 {
222	q1 := (1.0 - alpha) / 2
223	q2 := (1.0 + alpha) / 2
224	a := NormPpf(q1, loc, scale)
225	b := NormPpf(q2, loc, scale)
226	return [2]float64{a, b}
227}
228
229// factorial is the naive factorial algorithm.
230func factorial(x int) int {
231	if x == 0 {
232		return 1
233	}
234	return x * factorial(x-1)
235}
236
237// Ncr is an N choose R algorithm.
238// Aaron Cannon's algorithm.
239func Ncr(n, r int) int {
240	if n <= 1 || r == 0 || n == r {
241		return 1
242	}
243	if newR := n - r; newR < r {
244		r = newR
245	}
246	if r == 1 {
247		return n
248	}
249	ret := int(n - r + 1)
250	for i, j := ret+1, int(2); j <= r; i, j = i+1, j+1 {
251		ret = ret * i / j
252	}
253	return ret
254}
255