math.stats: update math.stats using generics (#11482)
parent
30029eaf5d
commit
480fe8041a
|
@ -2,40 +2,16 @@ module stats
|
||||||
|
|
||||||
import math
|
import math
|
||||||
|
|
||||||
// TODO: Implement all of them with generics
|
|
||||||
|
|
||||||
// This module defines the following statistical operations on f64 array
|
|
||||||
// ---------------------------
|
|
||||||
// | Summary of Functions |
|
|
||||||
// ---------------------------
|
|
||||||
// -----------------------------------------------------------------------
|
|
||||||
// freq - Frequency
|
|
||||||
// mean - Mean
|
|
||||||
// geometric_mean - Geometric Mean
|
|
||||||
// harmonic_mean - Harmonic Mean
|
|
||||||
// median - Median
|
|
||||||
// mode - Mode
|
|
||||||
// rms - Root Mean Square
|
|
||||||
// population_variance - Population Variance
|
|
||||||
// sample_variance - Sample Variance
|
|
||||||
// population_stddev - Population Standard Deviation
|
|
||||||
// sample_stddev - Sample Standard Deviation
|
|
||||||
// mean_absdev - Mean Absolute Deviation
|
|
||||||
// min - Minimum of the Array
|
|
||||||
// max - Maximum of the Array
|
|
||||||
// range - Range of the Array ( max - min )
|
|
||||||
// -----------------------------------------------------------------------
|
|
||||||
|
|
||||||
// Measure of Occurance
|
// Measure of Occurance
|
||||||
// Frequency of a given number
|
// Frequency of a given number
|
||||||
// Based on
|
// Based on
|
||||||
// https://www.mathsisfun.com/data/frequency-distribution.html
|
// https://www.mathsisfun.com/data/frequency-distribution.html
|
||||||
pub fn freq(arr []f64, val f64) int {
|
pub fn freq<T>(data []T, val T) int {
|
||||||
if arr.len == 0 {
|
if data.len == 0 {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
mut count := 0
|
mut count := 0
|
||||||
for v in arr {
|
for v in data {
|
||||||
if v == val {
|
if v == val {
|
||||||
count++
|
count++
|
||||||
}
|
}
|
||||||
|
@ -47,60 +23,60 @@ pub fn freq(arr []f64, val f64) int {
|
||||||
// Mean of the given input array
|
// Mean of the given input array
|
||||||
// Based on
|
// Based on
|
||||||
// https://www.mathsisfun.com/data/central-measures.html
|
// https://www.mathsisfun.com/data/central-measures.html
|
||||||
pub fn mean(arr []f64) f64 {
|
pub fn mean<T>(data []T) T {
|
||||||
if arr.len == 0 {
|
if data.len == 0 {
|
||||||
return f64(0)
|
return T(0)
|
||||||
}
|
}
|
||||||
mut sum := f64(0)
|
mut sum := T(0)
|
||||||
for v in arr {
|
for v in data {
|
||||||
sum += v
|
sum += v
|
||||||
}
|
}
|
||||||
return sum / f64(arr.len)
|
return sum / T(data.len)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Measure of Central Tendancy
|
// Measure of Central Tendancy
|
||||||
// Geometric Mean of the given input array
|
// Geometric Mean of the given input array
|
||||||
// Based on
|
// Based on
|
||||||
// https://www.mathsisfun.com/numbers/geometric-mean.html
|
// https://www.mathsisfun.com/numbers/geometric-mean.html
|
||||||
pub fn geometric_mean(arr []f64) f64 {
|
pub fn geometric_mean<T>(data []T) T {
|
||||||
if arr.len == 0 {
|
if data.len == 0 {
|
||||||
return f64(0)
|
return T(0)
|
||||||
}
|
}
|
||||||
mut sum := f64(1)
|
mut sum := 1.0
|
||||||
for v in arr {
|
for v in data {
|
||||||
sum *= v
|
sum *= v
|
||||||
}
|
}
|
||||||
return math.pow(sum, f64(1) / arr.len)
|
return math.pow(sum, 1.0 / T(data.len))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Measure of Central Tendancy
|
// Measure of Central Tendancy
|
||||||
// Harmonic Mean of the given input array
|
// Harmonic Mean of the given input array
|
||||||
// Based on
|
// Based on
|
||||||
// https://www.mathsisfun.com/numbers/harmonic-mean.html
|
// https://www.mathsisfun.com/numbers/harmonic-mean.html
|
||||||
pub fn harmonic_mean(arr []f64) f64 {
|
pub fn harmonic_mean<T>(data []T) T {
|
||||||
if arr.len == 0 {
|
if data.len == 0 {
|
||||||
return f64(0)
|
return T(0)
|
||||||
}
|
}
|
||||||
mut sum := f64(0)
|
mut sum := T(0)
|
||||||
for v in arr {
|
for v in data {
|
||||||
sum += f64(1) / v
|
sum += 1.0 / v
|
||||||
}
|
}
|
||||||
return f64(arr.len) / sum
|
return T(data.len) / sum
|
||||||
}
|
}
|
||||||
|
|
||||||
// Measure of Central Tendancy
|
// Measure of Central Tendancy
|
||||||
// Median of the given input array ( input array is assumed to be sorted )
|
// Median of the given input array ( input array is assumed to be sorted )
|
||||||
// Based on
|
// Based on
|
||||||
// https://www.mathsisfun.com/data/central-measures.html
|
// https://www.mathsisfun.com/data/central-measures.html
|
||||||
pub fn median(arr []f64) f64 {
|
pub fn median<T>(sorted_data []T) T {
|
||||||
if arr.len == 0 {
|
if sorted_data.len == 0 {
|
||||||
return f64(0)
|
return T(0)
|
||||||
}
|
}
|
||||||
if arr.len % 2 == 0 {
|
if sorted_data.len % 2 == 0 {
|
||||||
mid := (arr.len / 2) - 1
|
mid := (sorted_data.len / 2) - 1
|
||||||
return (arr[mid] + arr[mid + 1]) / f64(2)
|
return (sorted_data[mid] + sorted_data[mid + 1]) / T(2)
|
||||||
} else {
|
} else {
|
||||||
return arr[((arr.len - 1) / 2)]
|
return sorted_data[((sorted_data.len - 1) / 2)]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,114 +84,198 @@ pub fn median(arr []f64) f64 {
|
||||||
// Mode of the given input array
|
// Mode of the given input array
|
||||||
// Based on
|
// Based on
|
||||||
// https://www.mathsisfun.com/data/central-measures.html
|
// https://www.mathsisfun.com/data/central-measures.html
|
||||||
pub fn mode(arr []f64) f64 {
|
pub fn mode<T>(data []T) T {
|
||||||
if arr.len == 0 {
|
if data.len == 0 {
|
||||||
return f64(0)
|
return T(0)
|
||||||
}
|
}
|
||||||
mut freqs := []int{}
|
mut freqs := []int{}
|
||||||
for v in arr {
|
for v in data {
|
||||||
freqs << freq(arr, v)
|
freqs << freq(data, v)
|
||||||
}
|
}
|
||||||
mut max := 0
|
mut max := 0
|
||||||
for i in 0 .. freqs.len {
|
for i := 0; i < freqs.len; i++ {
|
||||||
if freqs[i] > freqs[max] {
|
if freqs[i] > freqs[max] {
|
||||||
max = i
|
max = i
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return arr[max]
|
return data[max]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Root Mean Square of the given input array
|
// Root Mean Square of the given input array
|
||||||
// Based on
|
// Based on
|
||||||
// https://en.wikipedia.org/wiki/Root_mean_square
|
// https://en.wikipedia.org/wiki/Root_mean_square
|
||||||
pub fn rms(arr []f64) f64 {
|
pub fn rms<T>(data []T) T {
|
||||||
if arr.len == 0 {
|
if data.len == 0 {
|
||||||
return f64(0)
|
return T(0)
|
||||||
}
|
}
|
||||||
mut sum := f64(0)
|
mut sum := T(0)
|
||||||
for v in arr {
|
for v in data {
|
||||||
sum += math.pow(v, 2)
|
sum += math.pow(v, 2)
|
||||||
}
|
}
|
||||||
return math.sqrt(sum / f64(arr.len))
|
return math.sqrt(sum / T(data.len))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Measure of Dispersion / Spread
|
// Measure of Dispersion / Spread
|
||||||
// Population Variance of the given input array
|
// Population Variance of the given input array
|
||||||
// Based on
|
// Based on
|
||||||
// https://www.mathsisfun.com/data/standard-deviation.html
|
// https://www.mathsisfun.com/data/standard-deviation.html
|
||||||
pub fn population_variance(arr []f64) f64 {
|
[inline]
|
||||||
if arr.len == 0 {
|
pub fn population_variance<T>(data []T) T {
|
||||||
return f64(0)
|
if data.len == 0 {
|
||||||
|
return T(0)
|
||||||
}
|
}
|
||||||
m := mean(arr)
|
data_mean := mean<T>(data)
|
||||||
mut sum := f64(0)
|
return population_variance_mean<T>(data, data_mean)
|
||||||
for v in arr {
|
}
|
||||||
sum += math.pow(v - m, 2)
|
|
||||||
|
// Measure of Dispersion / Spread
|
||||||
|
// Population Variance of the given input array
|
||||||
|
// Based on
|
||||||
|
// https://www.mathsisfun.com/data/standard-deviation.html
|
||||||
|
pub fn population_variance_mean<T>(data []T, mean T) T {
|
||||||
|
if data.len == 0 {
|
||||||
|
return T(0)
|
||||||
}
|
}
|
||||||
return sum / f64(arr.len)
|
mut sum := T(0)
|
||||||
|
for v in data {
|
||||||
|
sum += (v - mean) * (v - mean)
|
||||||
|
}
|
||||||
|
return sum / T(data.len)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Measure of Dispersion / Spread
|
// Measure of Dispersion / Spread
|
||||||
// Sample Variance of the given input array
|
// Sample Variance of the given input array
|
||||||
// Based on
|
// Based on
|
||||||
// https://www.mathsisfun.com/data/standard-deviation.html
|
// https://www.mathsisfun.com/data/standard-deviation.html
|
||||||
pub fn sample_variance(arr []f64) f64 {
|
[inline]
|
||||||
if arr.len == 0 {
|
pub fn sample_variance<T>(data []T) T {
|
||||||
return f64(0)
|
if data.len == 0 {
|
||||||
|
return T(0)
|
||||||
}
|
}
|
||||||
m := mean(arr)
|
data_mean := mean<T>(data)
|
||||||
mut sum := f64(0)
|
return sample_variance_mean<T>(data, data_mean)
|
||||||
for v in arr {
|
}
|
||||||
sum += math.pow(v - m, 2)
|
|
||||||
|
// Measure of Dispersion / Spread
|
||||||
|
// Sample Variance of the given input array
|
||||||
|
// Based on
|
||||||
|
// https://www.mathsisfun.com/data/standard-deviation.html
|
||||||
|
pub fn sample_variance_mean<T>(data []T, mean T) T {
|
||||||
|
if data.len == 0 {
|
||||||
|
return T(0)
|
||||||
}
|
}
|
||||||
return sum / f64(arr.len - 1)
|
mut sum := T(0)
|
||||||
|
for v in data {
|
||||||
|
sum += (v - mean) * (v - mean)
|
||||||
|
}
|
||||||
|
return sum / T(data.len - 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Measure of Dispersion / Spread
|
// Measure of Dispersion / Spread
|
||||||
// Population Standard Deviation of the given input array
|
// Population Standard Deviation of the given input array
|
||||||
// Based on
|
// Based on
|
||||||
// https://www.mathsisfun.com/data/standard-deviation.html
|
// https://www.mathsisfun.com/data/standard-deviation.html
|
||||||
pub fn population_stddev(arr []f64) f64 {
|
[inline]
|
||||||
if arr.len == 0 {
|
pub fn population_stddev<T>(data []T) T {
|
||||||
return f64(0)
|
if data.len == 0 {
|
||||||
|
return T(0)
|
||||||
}
|
}
|
||||||
return math.sqrt(population_variance(arr))
|
return math.sqrt(population_variance<T>(data))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Measure of Dispersion / Spread
|
||||||
|
// Population Standard Deviation of the given input array
|
||||||
|
// Based on
|
||||||
|
// https://www.mathsisfun.com/data/standard-deviation.html
|
||||||
|
[inline]
|
||||||
|
pub fn population_stddev_mean<T>(data []T, mean T) T {
|
||||||
|
if data.len == 0 {
|
||||||
|
return T(0)
|
||||||
|
}
|
||||||
|
return T(math.sqrt(f64(population_variance_mean<T>(data, mean))))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Measure of Dispersion / Spread
|
// Measure of Dispersion / Spread
|
||||||
// Sample Standard Deviation of the given input array
|
// Sample Standard Deviation of the given input array
|
||||||
// Based on
|
// Based on
|
||||||
// https://www.mathsisfun.com/data/standard-deviation.html
|
// https://www.mathsisfun.com/data/standard-deviation.html
|
||||||
pub fn sample_stddev(arr []f64) f64 {
|
[inline]
|
||||||
if arr.len == 0 {
|
pub fn sample_stddev<T>(data []T) T {
|
||||||
return f64(0)
|
if data.len == 0 {
|
||||||
|
return T(0)
|
||||||
}
|
}
|
||||||
return math.sqrt(sample_variance(arr))
|
return T(math.sqrt(f64(sample_variance<T>(data))))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Measure of Dispersion / Spread
|
||||||
|
// Sample Standard Deviation of the given input array
|
||||||
|
// Based on
|
||||||
|
// https://www.mathsisfun.com/data/standard-deviation.html
|
||||||
|
[inline]
|
||||||
|
pub fn sample_stddev_mean<T>(data []T, mean T) T {
|
||||||
|
if data.len == 0 {
|
||||||
|
return T(0)
|
||||||
|
}
|
||||||
|
return T(math.sqrt(f64(sample_variance_mean<T>(data, mean))))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Measure of Dispersion / Spread
|
// Measure of Dispersion / Spread
|
||||||
// Mean Absolute Deviation of the given input array
|
// Mean Absolute Deviation of the given input array
|
||||||
// Based on
|
// Based on
|
||||||
// https://en.wikipedia.org/wiki/Average_absolute_deviation
|
// https://en.wikipedia.org/wiki/Average_absolute_deviation
|
||||||
pub fn mean_absdev(arr []f64) f64 {
|
[inline]
|
||||||
if arr.len == 0 {
|
pub fn absdev<T>(data []T) T {
|
||||||
return f64(0)
|
if data.len == 0 {
|
||||||
|
return T(0)
|
||||||
}
|
}
|
||||||
amean := mean(arr)
|
data_mean := mean<T>(data)
|
||||||
mut sum := f64(0)
|
return absdev_mean<T>(data, data_mean)
|
||||||
for v in arr {
|
}
|
||||||
sum += math.abs(v - amean)
|
|
||||||
|
// Measure of Dispersion / Spread
|
||||||
|
// Mean Absolute Deviation of the given input array
|
||||||
|
// Based on
|
||||||
|
// https://en.wikipedia.org/wiki/Average_absolute_deviation
|
||||||
|
pub fn absdev_mean<T>(data []T, mean T) T {
|
||||||
|
if data.len == 0 {
|
||||||
|
return T(0)
|
||||||
}
|
}
|
||||||
return sum / f64(arr.len)
|
mut sum := T(0)
|
||||||
|
for v in data {
|
||||||
|
sum += math.abs(v - mean)
|
||||||
|
}
|
||||||
|
return sum / T(data.len)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sum of squares
|
||||||
|
[inline]
|
||||||
|
pub fn tss<T>(data []T) T {
|
||||||
|
if data.len == 0 {
|
||||||
|
return T(0)
|
||||||
|
}
|
||||||
|
data_mean := mean<T>(data)
|
||||||
|
return tss_mean<T>(data, data_mean)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sum of squares about the mean
|
||||||
|
pub fn tss_mean<T>(data []T, mean T) T {
|
||||||
|
if data.len == 0 {
|
||||||
|
return T(0)
|
||||||
|
}
|
||||||
|
mut tss := T(0)
|
||||||
|
for v in data {
|
||||||
|
tss += (v - mean) * (v - mean)
|
||||||
|
}
|
||||||
|
return tss
|
||||||
}
|
}
|
||||||
|
|
||||||
// Minimum of the given input array
|
// Minimum of the given input array
|
||||||
pub fn min(arr []f64) f64 {
|
pub fn min<T>(data []T) T {
|
||||||
if arr.len == 0 {
|
if data.len == 0 {
|
||||||
return f64(0)
|
return T(0)
|
||||||
}
|
}
|
||||||
mut min := arr[0]
|
mut min := data[0]
|
||||||
for v in arr {
|
for v in data {
|
||||||
if v < min {
|
if v < min {
|
||||||
min = v
|
min = v
|
||||||
}
|
}
|
||||||
|
@ -224,12 +284,12 @@ pub fn min(arr []f64) f64 {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Maximum of the given input array
|
// Maximum of the given input array
|
||||||
pub fn max(arr []f64) f64 {
|
pub fn max<T>(data []T) T {
|
||||||
if arr.len == 0 {
|
if data.len == 0 {
|
||||||
return f64(0)
|
return T(0)
|
||||||
}
|
}
|
||||||
mut max := arr[0]
|
mut max := data[0]
|
||||||
for v in arr {
|
for v in data {
|
||||||
if v > max {
|
if v > max {
|
||||||
max = v
|
max = v
|
||||||
}
|
}
|
||||||
|
@ -237,13 +297,188 @@ pub fn max(arr []f64) f64 {
|
||||||
return max
|
return max
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Minimum and maximum of the given input array
|
||||||
|
pub fn minmax<T>(data []T) (T, T) {
|
||||||
|
if data.len == 0 {
|
||||||
|
return T(0), T(0)
|
||||||
|
}
|
||||||
|
mut max := data[0]
|
||||||
|
mut min := data[0]
|
||||||
|
for v in data[1..] {
|
||||||
|
if v > max {
|
||||||
|
max = v
|
||||||
|
}
|
||||||
|
if v < min {
|
||||||
|
min = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return min, max
|
||||||
|
}
|
||||||
|
|
||||||
|
// Minimum of the given input array
|
||||||
|
pub fn min_index<T>(data []T) int {
|
||||||
|
if data.len == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
mut min := data[0]
|
||||||
|
mut min_index := 0
|
||||||
|
for i, v in data {
|
||||||
|
if v < min {
|
||||||
|
min = v
|
||||||
|
min_index = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return min_index
|
||||||
|
}
|
||||||
|
|
||||||
|
// Maximum of the given input array
|
||||||
|
pub fn max_index<T>(data []T) int {
|
||||||
|
if data.len == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
mut max := data[0]
|
||||||
|
mut max_index := 0
|
||||||
|
for i, v in data {
|
||||||
|
if v > max {
|
||||||
|
max = v
|
||||||
|
max_index = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return max_index
|
||||||
|
}
|
||||||
|
|
||||||
|
// Minimum and maximum of the given input array
|
||||||
|
pub fn minmax_index<T>(data []T) (int, int) {
|
||||||
|
if data.len == 0 {
|
||||||
|
return 0, 0
|
||||||
|
}
|
||||||
|
mut min := data[0]
|
||||||
|
mut max := data[0]
|
||||||
|
mut min_index := 0
|
||||||
|
mut max_index := 0
|
||||||
|
for i, v in data {
|
||||||
|
if v < min {
|
||||||
|
min = v
|
||||||
|
min_index = i
|
||||||
|
}
|
||||||
|
if v > max {
|
||||||
|
max = v
|
||||||
|
max_index = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return min_index, max_index
|
||||||
|
}
|
||||||
|
|
||||||
// Measure of Dispersion / Spread
|
// Measure of Dispersion / Spread
|
||||||
// Range ( Maximum - Minimum ) of the given input array
|
// Range ( Maximum - Minimum ) of the given input array
|
||||||
// Based on
|
// Based on
|
||||||
// https://www.mathsisfun.com/data/range.html
|
// https://www.mathsisfun.com/data/range.html
|
||||||
pub fn range(arr []f64) f64 {
|
pub fn range<T>(data []T) T {
|
||||||
if arr.len == 0 {
|
if data.len == 0 {
|
||||||
return f64(0)
|
return T(0)
|
||||||
|
}
|
||||||
|
min, max := minmax<T>(data)
|
||||||
|
return max - min
|
||||||
|
}
|
||||||
|
|
||||||
|
[inline]
|
||||||
|
pub fn covariance<T>(data1 []T, data2 []T) T {
|
||||||
|
mean1 := mean<T>(data1)
|
||||||
|
mean2 := mean<T>(data2)
|
||||||
|
return covariance_mean<T>(data1, data2, mean1, mean2)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the covariance of a dataset using
|
||||||
|
// the recurrence relation
|
||||||
|
pub fn covariance_mean<T>(data1 []T, data2 []T, mean1 T, mean2 T) T {
|
||||||
|
n := int(math.min(data1.len, data2.len))
|
||||||
|
if n == 0 {
|
||||||
|
return T(0)
|
||||||
|
}
|
||||||
|
mut covariance := T(0)
|
||||||
|
for i in 0 .. n {
|
||||||
|
delta1 := data1[i] - mean1
|
||||||
|
delta2 := data2[i] - mean2
|
||||||
|
covariance += (delta1 * delta2 - covariance) / (T(i) + 1.0)
|
||||||
|
}
|
||||||
|
return covariance
|
||||||
|
}
|
||||||
|
|
||||||
|
[inline]
|
||||||
|
pub fn lag1_autocorrelation<T>(data []T) T {
|
||||||
|
data_mean := mean<T>(data)
|
||||||
|
return lag1_autocorrelation_mean<T>(data, data_mean)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the lag-1 autocorrelation of a dataset using
|
||||||
|
// the recurrence relation
|
||||||
|
pub fn lag1_autocorrelation_mean<T>(data []T, mean T) T {
|
||||||
|
if data.len == 0 {
|
||||||
|
return T(0)
|
||||||
|
}
|
||||||
|
mut q := T(0)
|
||||||
|
mut v := (data[0] * mean) - (data[0] * mean)
|
||||||
|
for i := 1; i < data.len; i++ {
|
||||||
|
delta0 := data[i - 1] - mean
|
||||||
|
delta1 := data[i] - mean
|
||||||
|
q += (delta0 * delta1 - q) / (T(i) + 1.0)
|
||||||
|
v += (delta1 * delta1 - v) / (T(i) + 1.0)
|
||||||
|
}
|
||||||
|
return q / v
|
||||||
|
}
|
||||||
|
|
||||||
|
[inline]
|
||||||
|
pub fn kurtosis<T>(data []T) T {
|
||||||
|
data_mean := mean<T>(data)
|
||||||
|
sd := population_stddev_mean<T>(data, data_mean)
|
||||||
|
return kurtosis_mean_stddev<T>(data, data_mean, sd)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Takes a dataset and finds the kurtosis
|
||||||
|
// using the fourth moment the deviations, normalized by the sd
|
||||||
|
pub fn kurtosis_mean_stddev<T>(data []T, mean T, sd T) T {
|
||||||
|
mut avg := T(0) // find the fourth moment the deviations, normalized by the sd
|
||||||
|
/*
|
||||||
|
we use a recurrence relation to stably update a running value so
|
||||||
|
* there aren't any large sums that can overflow
|
||||||
|
*/
|
||||||
|
for i, v in data {
|
||||||
|
x := (v - mean) / sd
|
||||||
|
avg += (x * x * x * x - avg) / (T(i) + 1.0)
|
||||||
|
}
|
||||||
|
return avg - T(3.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
[inline]
|
||||||
|
pub fn skew<T>(data []T) T {
|
||||||
|
data_mean := mean<T>(data)
|
||||||
|
sd := population_stddev_mean<T>(data, data_mean)
|
||||||
|
return skew_mean_stddev<T>(data, data_mean, sd)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn skew_mean_stddev<T>(data []T, mean T, sd T) T {
|
||||||
|
mut skew := T(0) // find the sum of the cubed deviations, normalized by the sd.
|
||||||
|
/*
|
||||||
|
we use a recurrence relation to stably update a running value so
|
||||||
|
* there aren't any large sums that can overflow
|
||||||
|
*/
|
||||||
|
for i, v in data {
|
||||||
|
x := (v - mean) / sd
|
||||||
|
skew += (x * x * x - skew) / (T(i) + 1.0)
|
||||||
|
}
|
||||||
|
return skew
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn quantile<T>(sorted_data []T, f T) T {
|
||||||
|
if sorted_data.len == 0 {
|
||||||
|
return T(0)
|
||||||
|
}
|
||||||
|
index := f * (T(sorted_data.len) - 1.0)
|
||||||
|
lhs := int(index)
|
||||||
|
delta := index - T(lhs)
|
||||||
|
return if lhs == sorted_data.len - 1 {
|
||||||
|
sorted_data[lhs]
|
||||||
|
} else {
|
||||||
|
(1.0 - delta) * sorted_data[lhs] + delta * sorted_data[(lhs + 1)]
|
||||||
}
|
}
|
||||||
return max(arr) - min(arr)
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import math.stats
|
|
||||||
import math
|
import math
|
||||||
|
import math.stats
|
||||||
|
|
||||||
fn test_freq() {
|
fn test_freq() {
|
||||||
// Tests were also verified on Wolfram Alpha
|
// Tests were also verified on Wolfram Alpha
|
||||||
|
@ -44,8 +44,9 @@ fn test_geometric_mean() {
|
||||||
data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
|
data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
|
||||||
o = stats.geometric_mean(data)
|
o = stats.geometric_mean(data)
|
||||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||||
assert o.str() == 'nan' || o.str() == '-nan' || o.str() == '-1.#IND00' || o == f64(0)
|
ok := o.str() == 'nan' || o.str() == '-nan' || o.str() == '-1.#IND00' || o == f64(0)
|
||||||
|| o.str() == '-nan(ind)' // Because in math it yields a complex number
|
|| o.str() == '-nan(ind)'
|
||||||
|
assert ok // Because in math it yields a complex number
|
||||||
data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
|
data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
|
||||||
o = stats.geometric_mean(data)
|
o = stats.geometric_mean(data)
|
||||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||||
|
@ -194,18 +195,18 @@ fn test_sample_stddev() {
|
||||||
assert tst_res(o.str(), '33.263639')
|
assert tst_res(o.str(), '33.263639')
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_mean_absdev() {
|
fn test_absdev() {
|
||||||
// Tests were also verified on Wolfram Alpha
|
// Tests were also verified on Wolfram Alpha
|
||||||
mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
|
mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)]
|
||||||
mut o := stats.mean_absdev(data)
|
mut o := stats.absdev(data)
|
||||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||||
assert tst_res(o.str(), '2.187500')
|
assert tst_res(o.str(), '2.187500')
|
||||||
data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
|
data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)]
|
||||||
o = stats.mean_absdev(data)
|
o = stats.absdev(data)
|
||||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||||
assert tst_res(o.str(), '24.830000')
|
assert tst_res(o.str(), '24.830000')
|
||||||
data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
|
data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)]
|
||||||
o = stats.mean_absdev(data)
|
o = stats.absdev(data)
|
||||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||||
assert tst_res(o.str(), '27.768000')
|
assert tst_res(o.str(), '27.768000')
|
||||||
}
|
}
|
||||||
|
@ -262,7 +263,7 @@ fn test_passing_empty() {
|
||||||
assert stats.sample_variance(data) == f64(0)
|
assert stats.sample_variance(data) == f64(0)
|
||||||
assert stats.population_stddev(data) == f64(0)
|
assert stats.population_stddev(data) == f64(0)
|
||||||
assert stats.sample_stddev(data) == f64(0)
|
assert stats.sample_stddev(data) == f64(0)
|
||||||
assert stats.mean_absdev(data) == f64(0)
|
assert stats.absdev(data) == f64(0)
|
||||||
assert stats.min(data) == f64(0)
|
assert stats.min(data) == f64(0)
|
||||||
assert stats.max(data) == f64(0)
|
assert stats.max(data) == f64(0)
|
||||||
assert stats.range(data) == f64(0)
|
assert stats.range(data) == f64(0)
|
||||||
|
|
Loading…
Reference in New Issue