math.stats: update math.stats using generics (#11482)
							parent
							
								
									30029eaf5d
								
							
						
					
					
						commit
						480fe8041a
					
				|  | @ -2,40 +2,16 @@ module stats | ||||||
| 
 | 
 | ||||||
| import math | import math | ||||||
| 
 | 
 | ||||||
| // TODO: Implement all of them with generics
 |  | ||||||
| 
 |  | ||||||
| // This module defines the following statistical operations on f64 array
 |  | ||||||
| //  ---------------------------
 |  | ||||||
| // |   Summary of Functions    |
 |  | ||||||
| //  ---------------------------
 |  | ||||||
| // -----------------------------------------------------------------------
 |  | ||||||
| // freq - Frequency
 |  | ||||||
| // mean - Mean
 |  | ||||||
| // geometric_mean - Geometric Mean
 |  | ||||||
| // harmonic_mean - Harmonic Mean
 |  | ||||||
| // median - Median
 |  | ||||||
| // mode - Mode
 |  | ||||||
| // rms - Root Mean Square
 |  | ||||||
| // population_variance - Population Variance
 |  | ||||||
| // sample_variance - Sample Variance
 |  | ||||||
| // population_stddev - Population Standard Deviation
 |  | ||||||
| // sample_stddev - Sample Standard Deviation
 |  | ||||||
| // mean_absdev - Mean Absolute Deviation
 |  | ||||||
| // min - Minimum of the Array
 |  | ||||||
| // max - Maximum of the Array
 |  | ||||||
| // range - Range of the Array ( max - min )
 |  | ||||||
| // -----------------------------------------------------------------------
 |  | ||||||
| 
 |  | ||||||
| // Measure of Occurance
 | // Measure of Occurance
 | ||||||
| // Frequency of a given number
 | // Frequency of a given number
 | ||||||
| // Based on
 | // Based on
 | ||||||
| // https://www.mathsisfun.com/data/frequency-distribution.html
 | // https://www.mathsisfun.com/data/frequency-distribution.html
 | ||||||
| pub fn freq(arr []f64, val f64) int { | pub fn freq<T>(data []T, val T) int { | ||||||
| 	if arr.len == 0 { | 	if data.len == 0 { | ||||||
| 		return 0 | 		return 0 | ||||||
| 	} | 	} | ||||||
| 	mut count := 0 | 	mut count := 0 | ||||||
| 	for v in arr { | 	for v in data { | ||||||
| 		if v == val { | 		if v == val { | ||||||
| 			count++ | 			count++ | ||||||
| 		} | 		} | ||||||
|  | @ -47,60 +23,60 @@ pub fn freq(arr []f64, val f64) int { | ||||||
| // Mean of the given input array
 | // Mean of the given input array
 | ||||||
| // Based on
 | // Based on
 | ||||||
| // https://www.mathsisfun.com/data/central-measures.html
 | // https://www.mathsisfun.com/data/central-measures.html
 | ||||||
| pub fn mean(arr []f64) f64 { | pub fn mean<T>(data []T) T { | ||||||
| 	if arr.len == 0 { | 	if data.len == 0 { | ||||||
| 		return f64(0) | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	mut sum := f64(0) | 	mut sum := T(0) | ||||||
| 	for v in arr { | 	for v in data { | ||||||
| 		sum += v | 		sum += v | ||||||
| 	} | 	} | ||||||
| 	return sum / f64(arr.len) | 	return sum / T(data.len) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Measure of Central Tendancy
 | // Measure of Central Tendancy
 | ||||||
| // Geometric Mean of the given input array
 | // Geometric Mean of the given input array
 | ||||||
| // Based on
 | // Based on
 | ||||||
| // https://www.mathsisfun.com/numbers/geometric-mean.html
 | // https://www.mathsisfun.com/numbers/geometric-mean.html
 | ||||||
| pub fn geometric_mean(arr []f64) f64 { | pub fn geometric_mean<T>(data []T) T { | ||||||
| 	if arr.len == 0 { | 	if data.len == 0 { | ||||||
| 		return f64(0) | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	mut sum := f64(1) | 	mut sum := 1.0 | ||||||
| 	for v in arr { | 	for v in data { | ||||||
| 		sum *= v | 		sum *= v | ||||||
| 	} | 	} | ||||||
| 	return math.pow(sum, f64(1) / arr.len) | 	return math.pow(sum, 1.0 / T(data.len)) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Measure of Central Tendancy
 | // Measure of Central Tendancy
 | ||||||
| // Harmonic Mean of the given input array
 | // Harmonic Mean of the given input array
 | ||||||
| // Based on
 | // Based on
 | ||||||
| // https://www.mathsisfun.com/numbers/harmonic-mean.html
 | // https://www.mathsisfun.com/numbers/harmonic-mean.html
 | ||||||
| pub fn harmonic_mean(arr []f64) f64 { | pub fn harmonic_mean<T>(data []T) T { | ||||||
| 	if arr.len == 0 { | 	if data.len == 0 { | ||||||
| 		return f64(0) | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	mut sum := f64(0) | 	mut sum := T(0) | ||||||
| 	for v in arr { | 	for v in data { | ||||||
| 		sum += f64(1) / v | 		sum += 1.0 / v | ||||||
| 	} | 	} | ||||||
| 	return f64(arr.len) / sum | 	return T(data.len) / sum | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Measure of Central Tendancy
 | // Measure of Central Tendancy
 | ||||||
| // Median of the given input array ( input array is assumed to be sorted )
 | // Median of the given input array ( input array is assumed to be sorted )
 | ||||||
| // Based on
 | // Based on
 | ||||||
| // https://www.mathsisfun.com/data/central-measures.html
 | // https://www.mathsisfun.com/data/central-measures.html
 | ||||||
| pub fn median(arr []f64) f64 { | pub fn median<T>(sorted_data []T) T { | ||||||
| 	if arr.len == 0 { | 	if sorted_data.len == 0 { | ||||||
| 		return f64(0) | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	if arr.len % 2 == 0 { | 	if sorted_data.len % 2 == 0 { | ||||||
| 		mid := (arr.len / 2) - 1 | 		mid := (sorted_data.len / 2) - 1 | ||||||
| 		return (arr[mid] + arr[mid + 1]) / f64(2) | 		return (sorted_data[mid] + sorted_data[mid + 1]) / T(2) | ||||||
| 	} else { | 	} else { | ||||||
| 		return arr[((arr.len - 1) / 2)] | 		return sorted_data[((sorted_data.len - 1) / 2)] | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -108,114 +84,198 @@ pub fn median(arr []f64) f64 { | ||||||
| // Mode of the given input array
 | // Mode of the given input array
 | ||||||
| // Based on
 | // Based on
 | ||||||
| // https://www.mathsisfun.com/data/central-measures.html
 | // https://www.mathsisfun.com/data/central-measures.html
 | ||||||
| pub fn mode(arr []f64) f64 { | pub fn mode<T>(data []T) T { | ||||||
| 	if arr.len == 0 { | 	if data.len == 0 { | ||||||
| 		return f64(0) | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	mut freqs := []int{} | 	mut freqs := []int{} | ||||||
| 	for v in arr { | 	for v in data { | ||||||
| 		freqs << freq(arr, v) | 		freqs << freq(data, v) | ||||||
| 	} | 	} | ||||||
| 	mut max := 0 | 	mut max := 0 | ||||||
| 	for i in 0 .. freqs.len { | 	for i := 0; i < freqs.len; i++ { | ||||||
| 		if freqs[i] > freqs[max] { | 		if freqs[i] > freqs[max] { | ||||||
| 			max = i | 			max = i | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	return arr[max] | 	return data[max] | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Root Mean Square of the given input array
 | // Root Mean Square of the given input array
 | ||||||
| // Based on
 | // Based on
 | ||||||
| // https://en.wikipedia.org/wiki/Root_mean_square
 | // https://en.wikipedia.org/wiki/Root_mean_square
 | ||||||
| pub fn rms(arr []f64) f64 { | pub fn rms<T>(data []T) T { | ||||||
| 	if arr.len == 0 { | 	if data.len == 0 { | ||||||
| 		return f64(0) | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	mut sum := f64(0) | 	mut sum := T(0) | ||||||
| 	for v in arr { | 	for v in data { | ||||||
| 		sum += math.pow(v, 2) | 		sum += math.pow(v, 2) | ||||||
| 	} | 	} | ||||||
| 	return math.sqrt(sum / f64(arr.len)) | 	return math.sqrt(sum / T(data.len)) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Measure of Dispersion / Spread
 | // Measure of Dispersion / Spread
 | ||||||
| // Population Variance of the given input array
 | // Population Variance of the given input array
 | ||||||
| // Based on
 | // Based on
 | ||||||
| // https://www.mathsisfun.com/data/standard-deviation.html
 | // https://www.mathsisfun.com/data/standard-deviation.html
 | ||||||
| pub fn population_variance(arr []f64) f64 { | [inline] | ||||||
| 	if arr.len == 0 { | pub fn population_variance<T>(data []T) T { | ||||||
| 		return f64(0) | 	if data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	m := mean(arr) | 	data_mean := mean<T>(data) | ||||||
| 	mut sum := f64(0) | 	return population_variance_mean<T>(data, data_mean) | ||||||
| 	for v in arr { | } | ||||||
| 		sum += math.pow(v - m, 2) | 
 | ||||||
|  | // Measure of Dispersion / Spread
 | ||||||
|  | // Population Variance of the given input array
 | ||||||
|  | // Based on
 | ||||||
|  | // https://www.mathsisfun.com/data/standard-deviation.html
 | ||||||
|  | pub fn population_variance_mean<T>(data []T, mean T) T { | ||||||
|  | 	if data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	return sum / f64(arr.len) | 	mut sum := T(0) | ||||||
|  | 	for v in data { | ||||||
|  | 		sum += (v - mean) * (v - mean) | ||||||
|  | 	} | ||||||
|  | 	return sum / T(data.len) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Measure of Dispersion / Spread
 | // Measure of Dispersion / Spread
 | ||||||
| // Sample Variance of the given input array
 | // Sample Variance of the given input array
 | ||||||
| // Based on
 | // Based on
 | ||||||
| // https://www.mathsisfun.com/data/standard-deviation.html
 | // https://www.mathsisfun.com/data/standard-deviation.html
 | ||||||
| pub fn sample_variance(arr []f64) f64 { | [inline] | ||||||
| 	if arr.len == 0 { | pub fn sample_variance<T>(data []T) T { | ||||||
| 		return f64(0) | 	if data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	m := mean(arr) | 	data_mean := mean<T>(data) | ||||||
| 	mut sum := f64(0) | 	return sample_variance_mean<T>(data, data_mean) | ||||||
| 	for v in arr { | } | ||||||
| 		sum += math.pow(v - m, 2) | 
 | ||||||
|  | // Measure of Dispersion / Spread
 | ||||||
|  | // Sample Variance of the given input array
 | ||||||
|  | // Based on
 | ||||||
|  | // https://www.mathsisfun.com/data/standard-deviation.html
 | ||||||
|  | pub fn sample_variance_mean<T>(data []T, mean T) T { | ||||||
|  | 	if data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	return sum / f64(arr.len - 1) | 	mut sum := T(0) | ||||||
|  | 	for v in data { | ||||||
|  | 		sum += (v - mean) * (v - mean) | ||||||
|  | 	} | ||||||
|  | 	return sum / T(data.len - 1) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Measure of Dispersion / Spread
 | // Measure of Dispersion / Spread
 | ||||||
| // Population Standard Deviation of the given input array
 | // Population Standard Deviation of the given input array
 | ||||||
| // Based on
 | // Based on
 | ||||||
| // https://www.mathsisfun.com/data/standard-deviation.html
 | // https://www.mathsisfun.com/data/standard-deviation.html
 | ||||||
| pub fn population_stddev(arr []f64) f64 { | [inline] | ||||||
| 	if arr.len == 0 { | pub fn population_stddev<T>(data []T) T { | ||||||
| 		return f64(0) | 	if data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	return math.sqrt(population_variance(arr)) | 	return math.sqrt(population_variance<T>(data)) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Measure of Dispersion / Spread
 | ||||||
|  | // Population Standard Deviation of the given input array
 | ||||||
|  | // Based on
 | ||||||
|  | // https://www.mathsisfun.com/data/standard-deviation.html
 | ||||||
|  | [inline] | ||||||
|  | pub fn population_stddev_mean<T>(data []T, mean T) T { | ||||||
|  | 	if data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
|  | 	} | ||||||
|  | 	return T(math.sqrt(f64(population_variance_mean<T>(data, mean)))) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Measure of Dispersion / Spread
 | // Measure of Dispersion / Spread
 | ||||||
| // Sample Standard Deviation of the given input array
 | // Sample Standard Deviation of the given input array
 | ||||||
| // Based on
 | // Based on
 | ||||||
| // https://www.mathsisfun.com/data/standard-deviation.html
 | // https://www.mathsisfun.com/data/standard-deviation.html
 | ||||||
| pub fn sample_stddev(arr []f64) f64 { | [inline] | ||||||
| 	if arr.len == 0 { | pub fn sample_stddev<T>(data []T) T { | ||||||
| 		return f64(0) | 	if data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	return math.sqrt(sample_variance(arr)) | 	return T(math.sqrt(f64(sample_variance<T>(data)))) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Measure of Dispersion / Spread
 | ||||||
|  | // Sample Standard Deviation of the given input array
 | ||||||
|  | // Based on
 | ||||||
|  | // https://www.mathsisfun.com/data/standard-deviation.html
 | ||||||
|  | [inline] | ||||||
|  | pub fn sample_stddev_mean<T>(data []T, mean T) T { | ||||||
|  | 	if data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
|  | 	} | ||||||
|  | 	return T(math.sqrt(f64(sample_variance_mean<T>(data, mean)))) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Measure of Dispersion / Spread
 | // Measure of Dispersion / Spread
 | ||||||
| // Mean Absolute Deviation of the given input array
 | // Mean Absolute Deviation of the given input array
 | ||||||
| // Based on
 | // Based on
 | ||||||
| // https://en.wikipedia.org/wiki/Average_absolute_deviation
 | // https://en.wikipedia.org/wiki/Average_absolute_deviation
 | ||||||
| pub fn mean_absdev(arr []f64) f64 { | [inline] | ||||||
| 	if arr.len == 0 { | pub fn absdev<T>(data []T) T { | ||||||
| 		return f64(0) | 	if data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	amean := mean(arr) | 	data_mean := mean<T>(data) | ||||||
| 	mut sum := f64(0) | 	return absdev_mean<T>(data, data_mean) | ||||||
| 	for v in arr { | } | ||||||
| 		sum += math.abs(v - amean) | 
 | ||||||
|  | // Measure of Dispersion / Spread
 | ||||||
|  | // Mean Absolute Deviation of the given input array
 | ||||||
|  | // Based on
 | ||||||
|  | // https://en.wikipedia.org/wiki/Average_absolute_deviation
 | ||||||
|  | pub fn absdev_mean<T>(data []T, mean T) T { | ||||||
|  | 	if data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	return sum / f64(arr.len) | 	mut sum := T(0) | ||||||
|  | 	for v in data { | ||||||
|  | 		sum += math.abs(v - mean) | ||||||
|  | 	} | ||||||
|  | 	return sum / T(data.len) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Sum of squares
 | ||||||
|  | [inline] | ||||||
|  | pub fn tss<T>(data []T) T { | ||||||
|  | 	if data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
|  | 	} | ||||||
|  | 	data_mean := mean<T>(data) | ||||||
|  | 	return tss_mean<T>(data, data_mean) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Sum of squares about the mean
 | ||||||
|  | pub fn tss_mean<T>(data []T, mean T) T { | ||||||
|  | 	if data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
|  | 	} | ||||||
|  | 	mut tss := T(0) | ||||||
|  | 	for v in data { | ||||||
|  | 		tss += (v - mean) * (v - mean) | ||||||
|  | 	} | ||||||
|  | 	return tss | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Minimum of the given input array
 | // Minimum of the given input array
 | ||||||
| pub fn min(arr []f64) f64 { | pub fn min<T>(data []T) T { | ||||||
| 	if arr.len == 0 { | 	if data.len == 0 { | ||||||
| 		return f64(0) | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	mut min := arr[0] | 	mut min := data[0] | ||||||
| 	for v in arr { | 	for v in data { | ||||||
| 		if v < min { | 		if v < min { | ||||||
| 			min = v | 			min = v | ||||||
| 		} | 		} | ||||||
|  | @ -224,12 +284,12 @@ pub fn min(arr []f64) f64 { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Maximum of the given input array
 | // Maximum of the given input array
 | ||||||
| pub fn max(arr []f64) f64 { | pub fn max<T>(data []T) T { | ||||||
| 	if arr.len == 0 { | 	if data.len == 0 { | ||||||
| 		return f64(0) | 		return T(0) | ||||||
| 	} | 	} | ||||||
| 	mut max := arr[0] | 	mut max := data[0] | ||||||
| 	for v in arr { | 	for v in data { | ||||||
| 		if v > max { | 		if v > max { | ||||||
| 			max = v | 			max = v | ||||||
| 		} | 		} | ||||||
|  | @ -237,13 +297,188 @@ pub fn max(arr []f64) f64 { | ||||||
| 	return max | 	return max | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // Minimum and maximum of the given input array
 | ||||||
|  | pub fn minmax<T>(data []T) (T, T) { | ||||||
|  | 	if data.len == 0 { | ||||||
|  | 		return T(0), T(0) | ||||||
|  | 	} | ||||||
|  | 	mut max := data[0] | ||||||
|  | 	mut min := data[0] | ||||||
|  | 	for v in data[1..] { | ||||||
|  | 		if v > max { | ||||||
|  | 			max = v | ||||||
|  | 		} | ||||||
|  | 		if v < min { | ||||||
|  | 			min = v | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return min, max | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Minimum of the given input array
 | ||||||
|  | pub fn min_index<T>(data []T) int { | ||||||
|  | 	if data.len == 0 { | ||||||
|  | 		return 0 | ||||||
|  | 	} | ||||||
|  | 	mut min := data[0] | ||||||
|  | 	mut min_index := 0 | ||||||
|  | 	for i, v in data { | ||||||
|  | 		if v < min { | ||||||
|  | 			min = v | ||||||
|  | 			min_index = i | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return min_index | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Maximum of the given input array
 | ||||||
|  | pub fn max_index<T>(data []T) int { | ||||||
|  | 	if data.len == 0 { | ||||||
|  | 		return 0 | ||||||
|  | 	} | ||||||
|  | 	mut max := data[0] | ||||||
|  | 	mut max_index := 0 | ||||||
|  | 	for i, v in data { | ||||||
|  | 		if v > max { | ||||||
|  | 			max = v | ||||||
|  | 			max_index = i | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return max_index | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Minimum and maximum of the given input array
 | ||||||
|  | pub fn minmax_index<T>(data []T) (int, int) { | ||||||
|  | 	if data.len == 0 { | ||||||
|  | 		return 0, 0 | ||||||
|  | 	} | ||||||
|  | 	mut min := data[0] | ||||||
|  | 	mut max := data[0] | ||||||
|  | 	mut min_index := 0 | ||||||
|  | 	mut max_index := 0 | ||||||
|  | 	for i, v in data { | ||||||
|  | 		if v < min { | ||||||
|  | 			min = v | ||||||
|  | 			min_index = i | ||||||
|  | 		} | ||||||
|  | 		if v > max { | ||||||
|  | 			max = v | ||||||
|  | 			max_index = i | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return min_index, max_index | ||||||
|  | } | ||||||
|  | 
 | ||||||
| // Measure of Dispersion / Spread
 | // Measure of Dispersion / Spread
 | ||||||
| // Range ( Maximum - Minimum ) of the given input array
 | // Range ( Maximum - Minimum ) of the given input array
 | ||||||
| // Based on
 | // Based on
 | ||||||
| // https://www.mathsisfun.com/data/range.html
 | // https://www.mathsisfun.com/data/range.html
 | ||||||
| pub fn range(arr []f64) f64 { | pub fn range<T>(data []T) T { | ||||||
| 	if arr.len == 0 { | 	if data.len == 0 { | ||||||
| 		return f64(0) | 		return T(0) | ||||||
|  | 	} | ||||||
|  | 	min, max := minmax<T>(data) | ||||||
|  | 	return max - min | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | [inline] | ||||||
|  | pub fn covariance<T>(data1 []T, data2 []T) T { | ||||||
|  | 	mean1 := mean<T>(data1) | ||||||
|  | 	mean2 := mean<T>(data2) | ||||||
|  | 	return covariance_mean<T>(data1, data2, mean1, mean2) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Compute the covariance of a dataset using
 | ||||||
|  | // the recurrence relation
 | ||||||
|  | pub fn covariance_mean<T>(data1 []T, data2 []T, mean1 T, mean2 T) T { | ||||||
|  | 	n := int(math.min(data1.len, data2.len)) | ||||||
|  | 	if n == 0 { | ||||||
|  | 		return T(0) | ||||||
|  | 	} | ||||||
|  | 	mut covariance := T(0) | ||||||
|  | 	for i in 0 .. n { | ||||||
|  | 		delta1 := data1[i] - mean1 | ||||||
|  | 		delta2 := data2[i] - mean2 | ||||||
|  | 		covariance += (delta1 * delta2 - covariance) / (T(i) + 1.0) | ||||||
|  | 	} | ||||||
|  | 	return covariance | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | [inline] | ||||||
|  | pub fn lag1_autocorrelation<T>(data []T) T { | ||||||
|  | 	data_mean := mean<T>(data) | ||||||
|  | 	return lag1_autocorrelation_mean<T>(data, data_mean) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Compute the lag-1 autocorrelation of a dataset using
 | ||||||
|  | // the recurrence relation
 | ||||||
|  | pub fn lag1_autocorrelation_mean<T>(data []T, mean T) T { | ||||||
|  | 	if data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
|  | 	} | ||||||
|  | 	mut q := T(0) | ||||||
|  | 	mut v := (data[0] * mean) - (data[0] * mean) | ||||||
|  | 	for i := 1; i < data.len; i++ { | ||||||
|  | 		delta0 := data[i - 1] - mean | ||||||
|  | 		delta1 := data[i] - mean | ||||||
|  | 		q += (delta0 * delta1 - q) / (T(i) + 1.0) | ||||||
|  | 		v += (delta1 * delta1 - v) / (T(i) + 1.0) | ||||||
|  | 	} | ||||||
|  | 	return q / v | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | [inline] | ||||||
|  | pub fn kurtosis<T>(data []T) T { | ||||||
|  | 	data_mean := mean<T>(data) | ||||||
|  | 	sd := population_stddev_mean<T>(data, data_mean) | ||||||
|  | 	return kurtosis_mean_stddev<T>(data, data_mean, sd) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Takes a dataset and finds the kurtosis
 | ||||||
|  | // using the fourth moment the deviations, normalized by the sd
 | ||||||
|  | pub fn kurtosis_mean_stddev<T>(data []T, mean T, sd T) T { | ||||||
|  | 	mut avg := T(0) // find the fourth moment the deviations, normalized by the sd
 | ||||||
|  | 	/* | ||||||
|  | 	we use a recurrence relation to stably update a running value so | ||||||
|  |          * there aren't any large sums that can overflow | ||||||
|  | 	*/ | ||||||
|  | 	for i, v in data { | ||||||
|  | 		x := (v - mean) / sd | ||||||
|  | 		avg += (x * x * x * x - avg) / (T(i) + 1.0) | ||||||
|  | 	} | ||||||
|  | 	return avg - T(3.0) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | [inline] | ||||||
|  | pub fn skew<T>(data []T) T { | ||||||
|  | 	data_mean := mean<T>(data) | ||||||
|  | 	sd := population_stddev_mean<T>(data, data_mean) | ||||||
|  | 	return skew_mean_stddev<T>(data, data_mean, sd) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | pub fn skew_mean_stddev<T>(data []T, mean T, sd T) T { | ||||||
|  | 	mut skew := T(0) // find the sum of the cubed deviations, normalized by the sd.
 | ||||||
|  | 	/* | ||||||
|  | 	we use a recurrence relation to stably update a running value so | ||||||
|  |          * there aren't any large sums that can overflow | ||||||
|  | 	*/ | ||||||
|  | 	for i, v in data { | ||||||
|  | 		x := (v - mean) / sd | ||||||
|  | 		skew += (x * x * x - skew) / (T(i) + 1.0) | ||||||
|  | 	} | ||||||
|  | 	return skew | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | pub fn quantile<T>(sorted_data []T, f T) T { | ||||||
|  | 	if sorted_data.len == 0 { | ||||||
|  | 		return T(0) | ||||||
|  | 	} | ||||||
|  | 	index := f * (T(sorted_data.len) - 1.0) | ||||||
|  | 	lhs := int(index) | ||||||
|  | 	delta := index - T(lhs) | ||||||
|  | 	return if lhs == sorted_data.len - 1 { | ||||||
|  | 		sorted_data[lhs] | ||||||
|  | 	} else { | ||||||
|  | 		(1.0 - delta) * sorted_data[lhs] + delta * sorted_data[(lhs + 1)] | ||||||
| 	} | 	} | ||||||
| 	return max(arr) - min(arr) |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -1,5 +1,5 @@ | ||||||
| import math.stats |  | ||||||
| import math | import math | ||||||
|  | import math.stats | ||||||
| 
 | 
 | ||||||
| fn test_freq() { | fn test_freq() { | ||||||
| 	// Tests were also verified on Wolfram Alpha
 | 	// Tests were also verified on Wolfram Alpha
 | ||||||
|  | @ -44,8 +44,9 @@ fn test_geometric_mean() { | ||||||
| 	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] | 	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] | ||||||
| 	o = stats.geometric_mean(data) | 	o = stats.geometric_mean(data) | ||||||
| 	// Some issue with precision comparison in f64 using == operator hence serializing to string
 | 	// Some issue with precision comparison in f64 using == operator hence serializing to string
 | ||||||
| 	assert o.str() == 'nan' || o.str() == '-nan' || o.str() == '-1.#IND00' || o == f64(0) | 	ok := o.str() == 'nan' || o.str() == '-nan' || o.str() == '-1.#IND00' || o == f64(0) | ||||||
| 		|| o.str() == '-nan(ind)' // Because in math it yields a complex number
 | 		|| o.str() == '-nan(ind)' | ||||||
|  | 	assert ok // Because in math it yields a complex number
 | ||||||
| 	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] | 	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] | ||||||
| 	o = stats.geometric_mean(data) | 	o = stats.geometric_mean(data) | ||||||
| 	// Some issue with precision comparison in f64 using == operator hence serializing to string
 | 	// Some issue with precision comparison in f64 using == operator hence serializing to string
 | ||||||
|  | @ -194,18 +195,18 @@ fn test_sample_stddev() { | ||||||
| 	assert tst_res(o.str(), '33.263639') | 	assert tst_res(o.str(), '33.263639') | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| fn test_mean_absdev() { | fn test_absdev() { | ||||||
| 	// Tests were also verified on Wolfram Alpha
 | 	// Tests were also verified on Wolfram Alpha
 | ||||||
| 	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] | 	mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] | ||||||
| 	mut o := stats.mean_absdev(data) | 	mut o := stats.absdev(data) | ||||||
| 	// Some issue with precision comparison in f64 using == operator hence serializing to string
 | 	// Some issue with precision comparison in f64 using == operator hence serializing to string
 | ||||||
| 	assert tst_res(o.str(), '2.187500') | 	assert tst_res(o.str(), '2.187500') | ||||||
| 	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] | 	data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] | ||||||
| 	o = stats.mean_absdev(data) | 	o = stats.absdev(data) | ||||||
| 	// Some issue with precision comparison in f64 using == operator hence serializing to string
 | 	// Some issue with precision comparison in f64 using == operator hence serializing to string
 | ||||||
| 	assert tst_res(o.str(), '24.830000') | 	assert tst_res(o.str(), '24.830000') | ||||||
| 	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] | 	data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] | ||||||
| 	o = stats.mean_absdev(data) | 	o = stats.absdev(data) | ||||||
| 	// Some issue with precision comparison in f64 using == operator hence serializing to string
 | 	// Some issue with precision comparison in f64 using == operator hence serializing to string
 | ||||||
| 	assert tst_res(o.str(), '27.768000') | 	assert tst_res(o.str(), '27.768000') | ||||||
| } | } | ||||||
|  | @ -262,7 +263,7 @@ fn test_passing_empty() { | ||||||
| 	assert stats.sample_variance(data) == f64(0) | 	assert stats.sample_variance(data) == f64(0) | ||||||
| 	assert stats.population_stddev(data) == f64(0) | 	assert stats.population_stddev(data) == f64(0) | ||||||
| 	assert stats.sample_stddev(data) == f64(0) | 	assert stats.sample_stddev(data) == f64(0) | ||||||
| 	assert stats.mean_absdev(data) == f64(0) | 	assert stats.absdev(data) == f64(0) | ||||||
| 	assert stats.min(data) == f64(0) | 	assert stats.min(data) == f64(0) | ||||||
| 	assert stats.max(data) == f64(0) | 	assert stats.max(data) == f64(0) | ||||||
| 	assert stats.range(data) == f64(0) | 	assert stats.range(data) == f64(0) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue