v
Зеркало из https://github.com/vlang/v
1module stats
2
3import math
4
5// freq calculates the Measure of Occurrence
6// Frequency of a given number
7// Based on
8// https://www.mathsisfun.com/data/frequency-distribution.html
9pub fn freq[T](data []T, val T) int {
10if data.len == 0 {
11return 0
12}
13mut count := 0
14for v in data {
15if v == val {
16count++
17}
18}
19return count
20}
21
22// mean calculates the average
23// of the given input array, sum(data)/data.len
24// Based on
25// https://www.mathsisfun.com/data/central-measures.html
26pub fn mean[T](data []T) T {
27if data.len == 0 {
28return T(0)
29}
30mut sum := T(0)
31for v in data {
32sum += v
33}
34return sum / T(data.len)
35}
36
37// geometric_mean calculates the central tendency
38// of the given input array, product(data)**1/data.len
39// Based on
40// https://www.mathsisfun.com/numbers/geometric-mean.html
41pub fn geometric_mean[T](data []T) T {
42if data.len == 0 {
43return T(0)
44}
45mut sum := 1.0
46for v in data {
47sum *= v
48}
49return math.pow(sum, 1.0 / T(data.len))
50}
51
52// harmonic_mean calculates the reciprocal of the average of reciprocals
53// of the given input array
54// Based on
55// https://www.mathsisfun.com/numbers/harmonic-mean.html
56pub fn harmonic_mean[T](data []T) T {
57if data.len == 0 {
58return T(0)
59}
60mut sum := T(0)
61for v in data {
62sum += 1.0 / v
63}
64return T(data.len) / sum
65}
66
67// median returns the middlemost value of the given input array ( input array is assumed to be sorted )
68// Based on
69// https://www.mathsisfun.com/data/central-measures.html
70pub fn median[T](sorted_data []T) T {
71if sorted_data.len == 0 {
72return T(0)
73}
74if sorted_data.len % 2 == 0 {
75mid := (sorted_data.len / 2) - 1
76return (sorted_data[mid] + sorted_data[mid + 1]) / T(2)
77} else {
78return sorted_data[((sorted_data.len - 1) / 2)]
79}
80}
81
82// mode calculates the highest occurring value of the given input array
83// Based on
84// https://www.mathsisfun.com/data/central-measures.html
85pub fn mode[T](data []T) T {
86if data.len == 0 {
87return T(0)
88}
89mut freqs := []int{}
90for v in data {
91freqs << freq(data, v)
92}
93mut max := 0
94for i := 0; i < freqs.len; i++ {
95if freqs[i] > freqs[max] {
96max = i
97}
98}
99return data[max]
100}
101
102// rms, Root Mean Square, calculates the sqrt of the mean of the squares of the given input array
103// Based on
104// https://en.wikipedia.org/wiki/Root_mean_square
105pub fn rms[T](data []T) T {
106if data.len == 0 {
107return T(0)
108}
109mut sum := T(0)
110for v in data {
111sum += math.pow(v, 2)
112}
113return math.sqrt(sum / T(data.len))
114}
115
116// population_variance is the Measure of Dispersion / Spread
117// of the given input array
118// Based on
119// https://www.mathsisfun.com/data/standard-deviation.html
120@[inline]
121pub fn population_variance[T](data []T) T {
122if data.len == 0 {
123return T(0)
124}
125data_mean := mean[T](data)
126return population_variance_mean[T](data, data_mean)
127}
128
129// population_variance_mean is the Measure of Dispersion / Spread
130// of the given input array, with the provided mean
131// Based on
132// https://www.mathsisfun.com/data/standard-deviation.html
133pub fn population_variance_mean[T](data []T, mean T) T {
134if data.len == 0 {
135return T(0)
136}
137mut sum := T(0)
138for v in data {
139sum += (v - mean) * (v - mean)
140}
141return sum / T(data.len)
142}
143
144// sample_variance calculates the spread of dataset around the mean
145// Based on
146// https://www.mathsisfun.com/data/standard-deviation.html
147@[inline]
148pub fn sample_variance[T](data []T) T {
149if data.len == 0 {
150return T(0)
151}
152data_mean := mean[T](data)
153return sample_variance_mean[T](data, data_mean)
154}
155
156// sample_variance calculates the spread of dataset around the provided mean
157// Based on
158// https://www.mathsisfun.com/data/standard-deviation.html
159pub fn sample_variance_mean[T](data []T, mean T) T {
160if data.len == 0 {
161return T(0)
162}
163mut sum := T(0)
164for v in data {
165sum += (v - mean) * (v - mean)
166}
167return sum / T(data.len - 1)
168}
169
170// population_stddev calculates how spread out the dataset is
171// Based on
172// https://www.mathsisfun.com/data/standard-deviation.html
173@[inline]
174pub fn population_stddev[T](data []T) T {
175if data.len == 0 {
176return T(0)
177}
178return math.sqrt(population_variance[T](data))
179}
180
181// population_stddev_mean calculates how spread out the dataset is, with the provide mean
182// Based on
183// https://www.mathsisfun.com/data/standard-deviation.html
184@[inline]
185pub fn population_stddev_mean[T](data []T, mean T) T {
186if data.len == 0 {
187return T(0)
188}
189return T(math.sqrt(f64(population_variance_mean[T](data, mean))))
190}
191
192// Measure of Dispersion / Spread
193// Sample Standard Deviation of the given input array
194// Based on
195// https://www.mathsisfun.com/data/standard-deviation.html
196@[inline]
197pub fn sample_stddev[T](data []T) T {
198if data.len == 0 {
199return T(0)
200}
201return T(math.sqrt(f64(sample_variance[T](data))))
202}
203
204// Measure of Dispersion / Spread
205// Sample Standard Deviation of the given input array
206// Based on
207// https://www.mathsisfun.com/data/standard-deviation.html
208@[inline]
209pub fn sample_stddev_mean[T](data []T, mean T) T {
210if data.len == 0 {
211return T(0)
212}
213return T(math.sqrt(f64(sample_variance_mean[T](data, mean))))
214}
215
216// absdev calculates the average distance between each data point and the mean
217// Based on
218// https://en.wikipedia.org/wiki/Average_absolute_deviation
219@[inline]
220pub fn absdev[T](data []T) T {
221if data.len == 0 {
222return T(0)
223}
224data_mean := mean[T](data)
225return absdev_mean[T](data, data_mean)
226}
227
228// absdev_mean calculates the average distance between each data point and the provided mean
229// Based on
230// https://en.wikipedia.org/wiki/Average_absolute_deviation
231pub fn absdev_mean[T](data []T, mean T) T {
232if data.len == 0 {
233return T(0)
234}
235mut sum := T(0)
236for v in data {
237sum += math.abs(v - mean)
238}
239return sum / T(data.len)
240}
241
242// tts, Sum of squares, calculates the sum over all squared differences between values and overall mean
243@[inline]
244pub fn tss[T](data []T) T {
245if data.len == 0 {
246return T(0)
247}
248data_mean := mean[T](data)
249return tss_mean[T](data, data_mean)
250}
251
252// tts_mean, Sum of squares, calculates the sum over all squared differences between values and the provided mean
253pub fn tss_mean[T](data []T, mean T) T {
254if data.len == 0 {
255return T(0)
256}
257mut tss := T(0)
258for v in data {
259tss += (v - mean) * (v - mean)
260}
261return tss
262}
263
264// min finds the minimum value from the dataset
265pub fn min[T](data []T) T {
266if data.len == 0 {
267return T(0)
268}
269mut min := data[0]
270for v in data {
271if v < min {
272min = v
273}
274}
275return min
276}
277
278// max finds the maximum value from the dataset
279pub fn max[T](data []T) T {
280if data.len == 0 {
281return T(0)
282}
283mut max := data[0]
284for v in data {
285if v > max {
286max = v
287}
288}
289return max
290}
291
292// minmax finds the minimum and maximum value from the dataset
293pub fn minmax[T](data []T) (T, T) {
294if data.len == 0 {
295return T(0), T(0)
296}
297mut max := data[0]
298mut min := data[0]
299for v in data[1..] {
300if v > max {
301max = v
302}
303if v < min {
304min = v
305}
306}
307return min, max
308}
309
310// min_index finds the first index of the minimum value
311pub fn min_index[T](data []T) int {
312if data.len == 0 {
313return 0
314}
315mut min := data[0]
316mut min_index := 0
317for i, v in data {
318if v < min {
319min = v
320min_index = i
321}
322}
323return min_index
324}
325
326// max_index finds the first index of the maximum value
327pub fn max_index[T](data []T) int {
328if data.len == 0 {
329return 0
330}
331mut max := data[0]
332mut max_index := 0
333for i, v in data {
334if v > max {
335max = v
336max_index = i
337}
338}
339return max_index
340}
341
342// minmax_index finds the first index of the minimum and maximum value
343pub fn minmax_index[T](data []T) (int, int) {
344if data.len == 0 {
345return 0, 0
346}
347mut min := data[0]
348mut max := data[0]
349mut min_index := 0
350mut max_index := 0
351for i, v in data {
352if v < min {
353min = v
354min_index = i
355}
356if v > max {
357max = v
358max_index = i
359}
360}
361return min_index, max_index
362}
363
364// range calculates the difference between the min and max
365// Range ( Maximum - Minimum ) of the given input array
366// Based on
367// https://www.mathsisfun.com/data/range.html
368pub fn range[T](data []T) T {
369if data.len == 0 {
370return T(0)
371}
372min, max := minmax[T](data)
373return max - min
374}
375
376// covariance calculates directional association between datasets
377// positive value denotes variables move in same direction and negative denotes variables move in opposite directions
378@[inline]
379pub fn covariance[T](data1 []T, data2 []T) T {
380mean1 := mean[T](data1)
381mean2 := mean[T](data2)
382return covariance_mean[T](data1, data2, mean1, mean2)
383}
384
385// covariance_mean computes the covariance of a dataset with means provided
386// the recurrence relation
387pub fn covariance_mean[T](data1 []T, data2 []T, mean1 T, mean2 T) T {
388n := int(math.min(data1.len, data2.len))
389if n == 0 {
390return T(0)
391}
392mut covariance := T(0)
393for i in 0 .. n {
394delta1 := data1[i] - mean1
395delta2 := data2[i] - mean2
396covariance += (delta1 * delta2 - covariance) / (T(i) + 1.0)
397}
398return covariance
399}
400
401// lag1_autocorrelation_mean calculates the correlation between values that are one time period apart
402// of a dataset, based on the mean
403@[inline]
404pub fn lag1_autocorrelation[T](data []T) T {
405data_mean := mean[T](data)
406return lag1_autocorrelation_mean[T](data, data_mean)
407}
408
409// lag1_autocorrelation_mean calculates the correlation between values that are one time period apart
410// of a dataset, using
411// the recurrence relation
412pub fn lag1_autocorrelation_mean[T](data []T, mean T) T {
413if data.len == 0 {
414return T(0)
415}
416mut q := T(0)
417mut v := (data[0] * mean) - (data[0] * mean)
418for i := 1; i < data.len; i++ {
419delta0 := data[i - 1] - mean
420delta1 := data[i] - mean
421q += (delta0 * delta1 - q) / (T(i) + 1.0)
422v += (delta1 * delta1 - v) / (T(i) + 1.0)
423}
424return q / v
425}
426
427// kurtosis calculates the measure of the 'tailedness' of the data by finding mean and standard of deviation
428@[inline]
429pub fn kurtosis[T](data []T) T {
430data_mean := mean[T](data)
431sd := population_stddev_mean[T](data, data_mean)
432return kurtosis_mean_stddev[T](data, data_mean, sd)
433}
434
435// kurtosis_mean_stddev calculates the measure of the 'tailedness' of the data
436// using the fourth moment the deviations, normalized by the sd
437pub fn kurtosis_mean_stddev[T](data []T, mean T, sd T) T {
438mut avg := T(0) // find the fourth moment the deviations, normalized by the sd
439/*
440we use a recurrence relation to stably update a running value so
441* there aren't any large sums that can overflow
442*/
443for i, v in data {
444x := (v - mean) / sd
445avg += (x * x * x * x - avg) / (T(i) + 1.0)
446}
447return avg - T(3.0)
448}
449
450// skew calculates the mean and standard of deviation to find the skew from the data
451@[inline]
452pub fn skew[T](data []T) T {
453data_mean := mean[T](data)
454sd := population_stddev_mean[T](data, data_mean)
455return skew_mean_stddev[T](data, data_mean, sd)
456}
457
458// skew_mean_stddev calculates the skewness of data
459pub fn skew_mean_stddev[T](data []T, mean T, sd T) T {
460mut skew := T(0) // find the sum of the cubed deviations, normalized by the sd.
461/*
462we use a recurrence relation to stably update a running value so
463* there aren't any large sums that can overflow
464*/
465for i, v in data {
466x := (v - mean) / sd
467skew += (x * x * x - skew) / (T(i) + 1.0)
468}
469return skew
470}
471
472// quantile calculates quantile points
473// for more reference
474// https://en.wikipedia.org/wiki/Quantile
475pub fn quantile[T](sorted_data []T, f T) T {
476if sorted_data.len == 0 {
477return T(0)
478}
479index := f * (T(sorted_data.len) - 1.0)
480lhs := int(index)
481delta := index - T(lhs)
482return if lhs == sorted_data.len - 1 {
483sorted_data[lhs]
484} else {
485(1.0 - delta) * sorted_data[lhs] + delta * sorted_data[(lhs + 1)]
486}
487}
488