module stats import math // freq calculates the Measure of Occurance // Frequency of a given number // Based on // https://www.mathsisfun.com/data/frequency-distribution.html pub fn freq(data []T, val T) int { if data.len == 0 { return 0 } mut count := 0 for v in data { if v == val { count++ } } return count } // mean calculates the average // of the given input array, sum(data)/data.len // Based on // https://www.mathsisfun.com/data/central-measures.html pub fn mean(data []T) T { if data.len == 0 { return T(0) } mut sum := T(0) for v in data { sum += v } return sum / T(data.len) } // geometric_mean calculates the central tendency // of the given input array, product(data)**1/data.len // Based on // https://www.mathsisfun.com/numbers/geometric-mean.html pub fn geometric_mean(data []T) T { if data.len == 0 { return T(0) } mut sum := 1.0 for v in data { sum *= v } return math.pow(sum, 1.0 / T(data.len)) } // harmonic_mean calculates the reciprocal of the average of reciprocals // of the given input array // Based on // https://www.mathsisfun.com/numbers/harmonic-mean.html pub fn harmonic_mean(data []T) T { if data.len == 0 { return T(0) } mut sum := T(0) for v in data { sum += 1.0 / v } return T(data.len) / sum } // median returns the middlemost value of the given input array ( input array is assumed to be sorted ) // Based on // https://www.mathsisfun.com/data/central-measures.html pub fn median(sorted_data []T) T { if sorted_data.len == 0 { return T(0) } if sorted_data.len % 2 == 0 { mid := (sorted_data.len / 2) - 1 return (sorted_data[mid] + sorted_data[mid + 1]) / T(2) } else { return sorted_data[((sorted_data.len - 1) / 2)] } } // mode calculates the highest occuring value of the given input array // Based on // https://www.mathsisfun.com/data/central-measures.html pub fn mode(data []T) T { if data.len == 0 { return T(0) } mut freqs := []int{} for v in data { freqs << freq(data, v) } mut max := 0 for i := 0; i < freqs.len; i++ { if freqs[i] > freqs[max] { max = i } } return data[max] } // rms, Root Mean Square, calculates the sqrt of the mean of the squares of the given input array // Based on // https://en.wikipedia.org/wiki/Root_mean_square pub fn rms(data []T) T { if data.len == 0 { return T(0) } mut sum := T(0) for v in data { sum += math.pow(v, 2) } return math.sqrt(sum / T(data.len)) } // population_variance is the Measure of Dispersion / Spread // of the given input array // Based on // https://www.mathsisfun.com/data/standard-deviation.html [inline] pub fn population_variance(data []T) T { if data.len == 0 { return T(0) } data_mean := mean(data) return population_variance_mean(data, data_mean) } // population_variance_mean is the Measure of Dispersion / Spread // of the given input array, with the provided mean // Based on // https://www.mathsisfun.com/data/standard-deviation.html pub fn population_variance_mean(data []T, mean T) T { if data.len == 0 { return T(0) } mut sum := T(0) for v in data { sum += (v - mean) * (v - mean) } return sum / T(data.len) } // sample_variance calculates the spread of dataset around the mean // Based on // https://www.mathsisfun.com/data/standard-deviation.html [inline] pub fn sample_variance(data []T) T { if data.len == 0 { return T(0) } data_mean := mean(data) return sample_variance_mean(data, data_mean) } // sample_variance calculates the spread of dataset around the provided mean // Based on // https://www.mathsisfun.com/data/standard-deviation.html pub fn sample_variance_mean(data []T, mean T) T { if data.len == 0 { return T(0) } mut sum := T(0) for v in data { sum += (v - mean) * (v - mean) } return sum / T(data.len - 1) } // population_stddev calculates how spread out the dataset is // Based on // https://www.mathsisfun.com/data/standard-deviation.html [inline] pub fn population_stddev(data []T) T { if data.len == 0 { return T(0) } return math.sqrt(population_variance(data)) } // population_stddev_mean calculates how spread out the dataset is, with the provide mean // Based on // https://www.mathsisfun.com/data/standard-deviation.html [inline] pub fn population_stddev_mean(data []T, mean T) T { if data.len == 0 { return T(0) } return T(math.sqrt(f64(population_variance_mean(data, mean)))) } // Measure of Dispersion / Spread // Sample Standard Deviation of the given input array // Based on // https://www.mathsisfun.com/data/standard-deviation.html [inline] pub fn sample_stddev(data []T) T { if data.len == 0 { return T(0) } return T(math.sqrt(f64(sample_variance(data)))) } // Measure of Dispersion / Spread // Sample Standard Deviation of the given input array // Based on // https://www.mathsisfun.com/data/standard-deviation.html [inline] pub fn sample_stddev_mean(data []T, mean T) T { if data.len == 0 { return T(0) } return T(math.sqrt(f64(sample_variance_mean(data, mean)))) } // absdev calculates the average distance between each data point and the mean // Based on // https://en.wikipedia.org/wiki/Average_absolute_deviation [inline] pub fn absdev(data []T) T { if data.len == 0 { return T(0) } data_mean := mean(data) return absdev_mean(data, data_mean) } // absdev_mean calculates the average distance between each data point and the provided mean // Based on // https://en.wikipedia.org/wiki/Average_absolute_deviation pub fn absdev_mean(data []T, mean T) T { if data.len == 0 { return T(0) } mut sum := T(0) for v in data { sum += math.abs(v - mean) } return sum / T(data.len) } // tts, Sum of squares, calculates the sum over all squared differences between values and overall mean [inline] pub fn tss(data []T) T { if data.len == 0 { return T(0) } data_mean := mean(data) return tss_mean(data, data_mean) } // tts_mean, Sum of squares, calculates the sum over all squared differences between values and the provided mean pub fn tss_mean(data []T, mean T) T { if data.len == 0 { return T(0) } mut tss := T(0) for v in data { tss += (v - mean) * (v - mean) } return tss } // min finds the minimum value from the dataset pub fn min(data []T) T { if data.len == 0 { return T(0) } mut min := data[0] for v in data { if v < min { min = v } } return min } // max finds the maximum value from the dataset pub fn max(data []T) T { if data.len == 0 { return T(0) } mut max := data[0] for v in data { if v > max { max = v } } return max } // minmax finds the minimum and maximum value from the dataset pub fn minmax(data []T) (T, T) { if data.len == 0 { return T(0), T(0) } mut max := data[0] mut min := data[0] for v in data[1..] { if v > max { max = v } if v < min { min = v } } return min, max } // min_index finds the first index of the minimum value pub fn min_index(data []T) int { if data.len == 0 { return 0 } mut min := data[0] mut min_index := 0 for i, v in data { if v < min { min = v min_index = i } } return min_index } // max_index finds the first index of the maximum value pub fn max_index(data []T) int { if data.len == 0 { return 0 } mut max := data[0] mut max_index := 0 for i, v in data { if v > max { max = v max_index = i } } return max_index } // minmax_index finds the first index of the minimum and maximum value pub fn minmax_index(data []T) (int, int) { if data.len == 0 { return 0, 0 } mut min := data[0] mut max := data[0] mut min_index := 0 mut max_index := 0 for i, v in data { if v < min { min = v min_index = i } if v > max { max = v max_index = i } } return min_index, max_index } // range calculates the difference between the min and max // Range ( Maximum - Minimum ) of the given input array // Based on // https://www.mathsisfun.com/data/range.html pub fn range(data []T) T { if data.len == 0 { return T(0) } min, max := minmax(data) return max - min } // covariance calculates directional association between datasets // positive value denotes variables move in same direction and negative denotes variables move in opposite directions [inline] pub fn covariance(data1 []T, data2 []T) T { mean1 := mean(data1) mean2 := mean(data2) return covariance_mean(data1, data2, mean1, mean2) } // covariance_mean computes the covariance of a dataset with means provided // the recurrence relation pub fn covariance_mean(data1 []T, data2 []T, mean1 T, mean2 T) T { n := int(math.min(data1.len, data2.len)) if n == 0 { return T(0) } mut covariance := T(0) for i in 0 .. n { delta1 := data1[i] - mean1 delta2 := data2[i] - mean2 covariance += (delta1 * delta2 - covariance) / (T(i) + 1.0) } return covariance } // lag1_autocorrelation_mean calculates the correlation between values that are one time period apart // of a dataset, based on the mean [inline] pub fn lag1_autocorrelation(data []T) T { data_mean := mean(data) return lag1_autocorrelation_mean(data, data_mean) } // lag1_autocorrelation_mean calculates the correlation between values that are one time period apart // of a dataset, using // the recurrence relation pub fn lag1_autocorrelation_mean(data []T, mean T) T { if data.len == 0 { return T(0) } mut q := T(0) mut v := (data[0] * mean) - (data[0] * mean) for i := 1; i < data.len; i++ { delta0 := data[i - 1] - mean delta1 := data[i] - mean q += (delta0 * delta1 - q) / (T(i) + 1.0) v += (delta1 * delta1 - v) / (T(i) + 1.0) } return q / v } // kurtosis calculates the measure of the 'tailedness' of the data by finding mean and standard of deviation [inline] pub fn kurtosis(data []T) T { data_mean := mean(data) sd := population_stddev_mean(data, data_mean) return kurtosis_mean_stddev(data, data_mean, sd) } // kurtosis_mean_stddev calculates the measure of the 'tailedness' of the data // using the fourth moment the deviations, normalized by the sd pub fn kurtosis_mean_stddev(data []T, mean T, sd T) T { mut avg := T(0) // find the fourth moment the deviations, normalized by the sd /* we use a recurrence relation to stably update a running value so * there aren't any large sums that can overflow */ for i, v in data { x := (v - mean) / sd avg += (x * x * x * x - avg) / (T(i) + 1.0) } return avg - T(3.0) } // skew calculates the mean and standard of deviation to find the skew from the data [inline] pub fn skew(data []T) T { data_mean := mean(data) sd := population_stddev_mean(data, data_mean) return skew_mean_stddev(data, data_mean, sd) } // skew_mean_stddev calculates the skewness of data pub fn skew_mean_stddev(data []T, mean T, sd T) T { mut skew := T(0) // find the sum of the cubed deviations, normalized by the sd. /* we use a recurrence relation to stably update a running value so * there aren't any large sums that can overflow */ for i, v in data { x := (v - mean) / sd skew += (x * x * x - skew) / (T(i) + 1.0) } return skew } pub fn quantile(sorted_data []T, f T) T { if sorted_data.len == 0 { return T(0) } index := f * (T(sorted_data.len) - 1.0) lhs := int(index) delta := index - T(lhs) return if lhs == sorted_data.len - 1 { sorted_data[lhs] } else { (1.0 - delta) * sorted_data[lhs] + delta * sorted_data[(lhs + 1)] } }