diff --git a/vlib/math/stats/stats.v b/vlib/math/stats/stats.v new file mode 100644 index 0000000000..0023189c80 --- /dev/null +++ b/vlib/math/stats/stats.v @@ -0,0 +1,251 @@ +module stats + +import math + +// This module defines the following statistical operations on f64 array +// --------------------------- +// | Summary of Functions | +// --------------------------- +// ----------------------------------------------------------------------- +// freq - Frequency +// mean - Mean +// geometric_mean - Geometric Mean +// harmonic_mean - Harmonic Mean +// median - Median +// mode - Mode +// rms - Root Mean Square +// population_variance - Population Variance +// sample_variance - Sample Variance +// population_stddev - Population Standard Deviation +// sample_stddev - Sample Standard Deviation +// mean_absdev - Mean Absolute Deviation +// min - Minimum of the Array +// max - Maximum of the Array +// range - Range of the Array ( max - min ) +// ----------------------------------------------------------------------- + + +// Measure of Occurance +// Frequency of a given number +// Based on +// https://www.mathsisfun.com/data/frequency-distribution.html +pub fn freq(arr []f64, val f64) int { + if arr.len == 0 { + return 0 + } + mut count := 0 + for v in arr { + if v == val { + count++ + } + } + return count +} + +// Measure of Central Tendancy +// Mean of the given input array +// Based on +// https://www.mathsisfun.com/data/central-measures.html +pub fn mean(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut sum := f64(0) + for v in arr { + sum += v + } + return sum/f64(arr.len) +} + +// Measure of Central Tendancy +// Geometric Mean of the given input array +// Based on +// https://www.mathsisfun.com/numbers/geometric-mean.html +pub fn geometric_mean(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut sum := f64(1) + for v in arr { + sum *= v + } + return math.pow(sum,f64(1)/arr.len) +} + +// Measure of Central Tendancy +// Harmonic Mean of the given input array +// Based on +// https://www.mathsisfun.com/numbers/harmonic-mean.html +pub fn harmonic_mean(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut sum := f64(0) + for v in arr { + sum += f64(1)/v + } + return f64(arr.len)/sum +} + +// Measure of Central Tendancy +// Median of the given input array ( input array is assumed to be sorted ) +// Based on +// https://www.mathsisfun.com/data/central-measures.html +pub fn median(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + if arr.len % 2 == 0 { + mid := (arr.len/2)-1 + return (arr[mid] + arr[mid+1])/f64(2) + } + else { + return arr[((arr.len-1)/2)] + } +} + +// Measure of Central Tendancy +// Mode of the given input array +// Based on +// https://www.mathsisfun.com/data/central-measures.html +pub fn mode(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut freqs := []int + for v in arr { + freqs< freqs[max] { + max = i + } + i++ + } + return arr[max] +} + +// Root Mean Square of the given input array +// Based on +// https://en.wikipedia.org/wiki/Root_mean_square +pub fn rms(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut sum := f64(0) + for v in arr { + sum += math.pow(v,2) + } + return math.sqrt(sum/f64(arr.len)) +} + +// Measure of Dispersion / Spread +// Population Variance of the given input array +// Based on +// https://www.mathsisfun.com/data/standard-deviation.html +pub fn population_variance(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + m := mean(arr) + mut sum := f64(0) + for v in arr { + sum += math.pow(v-m,2) + } + return sum/f64(arr.len) +} + +// Measure of Dispersion / Spread +// Sample Variance of the given input array +// Based on +// https://www.mathsisfun.com/data/standard-deviation.html +pub fn sample_variance(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + m := mean(arr) + mut sum := f64(0) + for v in arr { + sum += math.pow(v-m,2) + } + return sum/f64(arr.len-1) +} + +// Measure of Dispersion / Spread +// Population Standard Deviation of the given input array +// Based on +// https://www.mathsisfun.com/data/standard-deviation.html +pub fn population_stddev(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + return math.sqrt(population_variance(arr)) +} + +// Measure of Dispersion / Spread +// Sample Standard Deviation of the given input array +// Based on +// https://www.mathsisfun.com/data/standard-deviation.html +pub fn sample_stddev(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + return math.sqrt(sample_variance(arr)) +} + +// Measure of Dispersion / Spread +// Mean Absolute Deviation of the given input array +// Based on +// https://en.wikipedia.org/wiki/Average_absolute_deviation +pub fn mean_absdev(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mean := mean(arr) + mut sum := f64(0) + for v in arr { + sum += math.abs(v-mean) + } + return sum/f64(arr.len) +} + +// Minimum of the given input array +pub fn min(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut min := arr[0] + for v in arr { + if v < min { + min = v + } + } + return min +} + +// Maximum of the given input array +pub fn max(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + mut max := arr[0] + for v in arr { + if v > max { + max = v + } + } + return max +} + +// Measure of Dispersion / Spread +// Range ( Maximum - Minimum ) of the given input array +// Based on +// https://www.mathsisfun.com/data/range.html +pub fn range(arr []f64) f64 { + if arr.len == 0 { + return f64(0) + } + return max(arr) - min(arr) +} \ No newline at end of file diff --git a/vlib/math/stats_test.v b/vlib/math/stats_test.v new file mode 100644 index 0000000000..2f8007af72 --- /dev/null +++ b/vlib/math/stats_test.v @@ -0,0 +1,259 @@ +import math.stats as stats + +fn test_freq() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0),f64(10.0),f64(5.9),f64(2.7)] + mut o := stats.freq(data,10.0) + assert o == 2 + o = stats.freq(data,2.7) + assert o == 1 + o = stats.freq(data,15) + assert o == 0 +} + +fn test_mean() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)] + mut o := stats.mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('5.762500') + data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)] + o = stats.mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('17.650000') + data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)] + o = stats.mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('37.708000') +} + +fn test_geometric_mean() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)] + mut o := stats.geometric_mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('5.159932') + data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)] + o = stats.geometric_mean(data) + println(o) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('nan') || o.str().eq('-nan') || o == f64(0) // Because in math it yields a complex number + data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)] + o = stats.geometric_mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('25.064496') +} + +fn test_harmonic_mean() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)] + mut o := stats.harmonic_mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('4.626519') + data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)] + o = stats.harmonic_mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('9.134577') + data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)] + o = stats.harmonic_mean(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('16.555477') +} + +fn test_median() { + // Tests were also verified on Wolfram Alpha + // Assumes sorted array + + // Even + mut data := [f64(2.7),f64(4.45),f64(5.9),f64(10.0)] + mut o := stats.median(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('5.175000') + data = [f64(-3.0),f64(1.89),f64(4.4),f64(67.31)] + o = stats.median(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('3.145000') + data = [f64(7.88),f64(12.0),f64(54.83),f64(76.122)] + o = stats.median(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('33.415000') + + // Odd + data = [f64(2.7),f64(4.45),f64(5.9),f64(10.0),f64(22)] + o = stats.median(data) + assert o == f64(5.9) + data = [f64(-3.0),f64(1.89),f64(4.4),f64(9),f64(67.31)] + o = stats.median(data) + assert o == f64(4.4) + data = [f64(7.88),f64(3.3),f64(12.0),f64(54.83),f64(76.122)] + o = stats.median(data) + assert o == f64(12.0) +} + +fn test_mode() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(2.7),f64(2.7),f64(4.45),f64(5.9),f64(10.0)] + mut o := stats.mode(data) + assert o == f64(2.7) + data = [f64(-3.0),f64(1.89),f64(1.89),f64(1.89),f64(9),f64(4.4),f64(4.4),f64(9),f64(67.31)] + o = stats.mode(data) + assert o == f64(1.89) + // Testing greedy nature + data = [f64(2.0),f64(4.0),f64(2.0),f64(4.0)] + o = stats.mode(data) + assert o == f64(2.0) +} + +fn test_rms() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)] + mut o := stats.rms(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('6.362046') + data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)] + o = stats.rms(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('33.773393') + data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)] + o = stats.rms(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('47.452561') +} + +fn test_population_variance() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)] + mut o := stats.population_variance(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('7.269219') + data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)] + o = stats.population_variance(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('829.119550') + data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)] + o = stats.population_variance(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('829.852282') +} + +fn test_sample_variance() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)] + mut o := stats.sample_variance(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('9.692292') + data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)] + o = stats.sample_variance(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('1105.492733') + data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)] + o = stats.sample_variance(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('1106.469709') +} + +fn test_population_stddev() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)] + mut o := stats.population_stddev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('2.696149') + data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)] + o = stats.population_stddev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('28.794436') + data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)] + o = stats.population_stddev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('28.807157') +} + +fn test_sample_stddev() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)] + mut o := stats.sample_stddev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('3.113245') + data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)] + o = stats.sample_stddev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('33.248951') + data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)] + o = stats.sample_stddev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('33.263639') +} + +fn test_mean_absdev() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)] + mut o := stats.mean_absdev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('2.187500') + data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)] + o = stats.mean_absdev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('24.830000') + data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)] + o = stats.mean_absdev(data) + // Some issue with precision comparison in f64 using == operator hence serializing to string + assert o.str().eq('27.768000') +} + +fn test_min() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)] + mut o := stats.min(data) + assert o == f64(2.7) + data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)] + o = stats.min(data) + assert o == f64(-3.0) + data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)] + o = stats.min(data) + assert o == f64(7.88) +} + +fn test_max() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)] + mut o := stats.max(data) + assert o == f64(10.0) + data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)] + o = stats.max(data) + assert o == f64(67.31) + data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)] + o = stats.max(data) + assert o == f64(76.122) +} + +fn test_range() { + // Tests were also verified on Wolfram Alpha + mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)] + mut o := stats.range(data) + assert o == f64(7.3) + data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)] + o = stats.range(data) + assert o == f64(70.31) + data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)] + o = stats.range(data) + assert o == f64(68.242) +} + +fn test_passing_empty() { + data := []f64 + assert stats.freq(data,0) == 0 + assert stats.mean(data) == f64(0) + assert stats.geometric_mean(data) == f64(0) + assert stats.harmonic_mean(data) == f64(0) + assert stats.median(data) == f64(0) + assert stats.mode(data) == f64(0) + assert stats.rms(data) == f64(0) + assert stats.population_variance(data) == f64(0) + assert stats.sample_variance(data) == f64(0) + assert stats.population_stddev(data) == f64(0) + assert stats.sample_stddev(data) == f64(0) + assert stats.mean_absdev(data) == f64(0) + assert stats.min(data) == f64(0) + assert stats.max(data) == f64(0) + assert stats.range(data) == f64(0) +} \ No newline at end of file