v

stats.v
487 строк · 11.3 Кб
Перенос по словам
1
module stats
2

3
import math
4

5
// freq calculates the Measure of Occurrence
6
// Frequency of a given number
7
// Based on
8
// https://www.mathsisfun.com/data/frequency-distribution.html
9
pub fn freq[T](data []T, val T) int {
10
	if data.len == 0 {
11
		return 0
12
	}
13
	mut count := 0
14
	for v in data {
15
		if v == val {
16
			count++
17
		}
18
	}
19
	return count
20
}
21

22
// mean calculates the average
23
// of the given input array, sum(data)/data.len
24
// Based on
25
// https://www.mathsisfun.com/data/central-measures.html
26
pub fn mean[T](data []T) T {
27
	if data.len == 0 {
28
		return T(0)
29
	}
30
	mut sum := T(0)
31
	for v in data {
32
		sum += v
33
	}
34
	return sum / T(data.len)
35
}
36

37
// geometric_mean calculates the central tendency
38
// of the given input array, product(data)**1/data.len
39
// Based on
40
// https://www.mathsisfun.com/numbers/geometric-mean.html
41
pub fn geometric_mean[T](data []T) T {
42
	if data.len == 0 {
43
		return T(0)
44
	}
45
	mut sum := 1.0
46
	for v in data {
47
		sum *= v
48
	}
49
	return math.pow(sum, 1.0 / T(data.len))
50
}
51

52
// harmonic_mean calculates the reciprocal of the average of reciprocals
53
// of the given input array
54
// Based on
55
// https://www.mathsisfun.com/numbers/harmonic-mean.html
56
pub fn harmonic_mean[T](data []T) T {
57
	if data.len == 0 {
58
		return T(0)
59
	}
60
	mut sum := T(0)
61
	for v in data {
62
		sum += 1.0 / v
63
	}
64
	return T(data.len) / sum
65
}
66

67
// median returns the middlemost value of the given input array ( input array is assumed to be sorted )
68
// Based on
69
// https://www.mathsisfun.com/data/central-measures.html
70
pub fn median[T](sorted_data []T) T {
71
	if sorted_data.len == 0 {
72
		return T(0)
73
	}
74
	if sorted_data.len % 2 == 0 {
75
		mid := (sorted_data.len / 2) - 1
76
		return (sorted_data[mid] + sorted_data[mid + 1]) / T(2)
77
	} else {
78
		return sorted_data[((sorted_data.len - 1) / 2)]
79
	}
80
}
81

82
// mode calculates the highest occurring value of the given input array
83
// Based on
84
// https://www.mathsisfun.com/data/central-measures.html
85
pub fn mode[T](data []T) T {
86
	if data.len == 0 {
87
		return T(0)
88
	}
89
	mut freqs := []int{}
90
	for v in data {
91
		freqs << freq(data, v)
92
	}
93
	mut max := 0
94
	for i := 0; i < freqs.len; i++ {
95
		if freqs[i] > freqs[max] {
96
			max = i
97
		}
98
	}
99
	return data[max]
100
}
101

102
// rms, Root Mean Square, calculates the sqrt of the mean of the squares of the given input array
103
// Based on
104
// https://en.wikipedia.org/wiki/Root_mean_square
105
pub fn rms[T](data []T) T {
106
	if data.len == 0 {
107
		return T(0)
108
	}
109
	mut sum := T(0)
110
	for v in data {
111
		sum += math.pow(v, 2)
112
	}
113
	return math.sqrt(sum / T(data.len))
114
}
115

116
// population_variance is the Measure of Dispersion / Spread
117
// of the given input array
118
// Based on
119
// https://www.mathsisfun.com/data/standard-deviation.html
120
@[inline]
121
pub fn population_variance[T](data []T) T {
122
	if data.len == 0 {
123
		return T(0)
124
	}
125
	data_mean := mean[T](data)
126
	return population_variance_mean[T](data, data_mean)
127
}
128

129
// population_variance_mean is the Measure of Dispersion / Spread
130
// of the given input array, with the provided mean
131
// Based on
132
// https://www.mathsisfun.com/data/standard-deviation.html
133
pub fn population_variance_mean[T](data []T, mean T) T {
134
	if data.len == 0 {
135
		return T(0)
136
	}
137
	mut sum := T(0)
138
	for v in data {
139
		sum += (v - mean) * (v - mean)
140
	}
141
	return sum / T(data.len)
142
}
143

144
// sample_variance calculates the spread of dataset around the mean
145
// Based on
146
// https://www.mathsisfun.com/data/standard-deviation.html
147
@[inline]
148
pub fn sample_variance[T](data []T) T {
149
	if data.len == 0 {
150
		return T(0)
151
	}
152
	data_mean := mean[T](data)
153
	return sample_variance_mean[T](data, data_mean)
154
}
155

156
// sample_variance calculates the spread of dataset around the provided mean
157
// Based on
158
// https://www.mathsisfun.com/data/standard-deviation.html
159
pub fn sample_variance_mean[T](data []T, mean T) T {
160
	if data.len == 0 {
161
		return T(0)
162
	}
163
	mut sum := T(0)
164
	for v in data {
165
		sum += (v - mean) * (v - mean)
166
	}
167
	return sum / T(data.len - 1)
168
}
169

170
// population_stddev calculates how spread out the dataset is
171
// Based on
172
// https://www.mathsisfun.com/data/standard-deviation.html
173
@[inline]
174
pub fn population_stddev[T](data []T) T {
175
	if data.len == 0 {
176
		return T(0)
177
	}
178
	return math.sqrt(population_variance[T](data))
179
}
180

181
// population_stddev_mean calculates how spread out the dataset is, with the provide mean
182
// Based on
183
// https://www.mathsisfun.com/data/standard-deviation.html
184
@[inline]
185
pub fn population_stddev_mean[T](data []T, mean T) T {
186
	if data.len == 0 {
187
		return T(0)
188
	}
189
	return T(math.sqrt(f64(population_variance_mean[T](data, mean))))
190
}
191

192
// Measure of Dispersion / Spread
193
// Sample Standard Deviation of the given input array
194
// Based on
195
// https://www.mathsisfun.com/data/standard-deviation.html
196
@[inline]
197
pub fn sample_stddev[T](data []T) T {
198
	if data.len == 0 {
199
		return T(0)
200
	}
201
	return T(math.sqrt(f64(sample_variance[T](data))))
202
}
203

204
// Measure of Dispersion / Spread
205
// Sample Standard Deviation of the given input array
206
// Based on
207
// https://www.mathsisfun.com/data/standard-deviation.html
208
@[inline]
209
pub fn sample_stddev_mean[T](data []T, mean T) T {
210
	if data.len == 0 {
211
		return T(0)
212
	}
213
	return T(math.sqrt(f64(sample_variance_mean[T](data, mean))))
214
}
215

216
// absdev calculates the average distance between each data point and the mean
217
// Based on
218
// https://en.wikipedia.org/wiki/Average_absolute_deviation
219
@[inline]
220
pub fn absdev[T](data []T) T {
221
	if data.len == 0 {
222
		return T(0)
223
	}
224
	data_mean := mean[T](data)
225
	return absdev_mean[T](data, data_mean)
226
}
227

228
// absdev_mean calculates the average distance between each data point and the provided mean
229
// Based on
230
// https://en.wikipedia.org/wiki/Average_absolute_deviation
231
pub fn absdev_mean[T](data []T, mean T) T {
232
	if data.len == 0 {
233
		return T(0)
234
	}
235
	mut sum := T(0)
236
	for v in data {
237
		sum += math.abs(v - mean)
238
	}
239
	return sum / T(data.len)
240
}
241

242
// tts, Sum of squares, calculates the sum over all squared differences between values and overall mean
243
@[inline]
244
pub fn tss[T](data []T) T {
245
	if data.len == 0 {
246
		return T(0)
247
	}
248
	data_mean := mean[T](data)
249
	return tss_mean[T](data, data_mean)
250
}
251

252
// tts_mean, Sum of squares, calculates the sum over all squared differences between values and the provided mean
253
pub fn tss_mean[T](data []T, mean T) T {
254
	if data.len == 0 {
255
		return T(0)
256
	}
257
	mut tss := T(0)
258
	for v in data {
259
		tss += (v - mean) * (v - mean)
260
	}
261
	return tss
262
}
263

264
// min finds the minimum value from the dataset
265
pub fn min[T](data []T) T {
266
	if data.len == 0 {
267
		return T(0)
268
	}
269
	mut min := data[0]
270
	for v in data {
271
		if v < min {
272
			min = v
273
		}
274
	}
275
	return min
276
}
277

278
// max finds the maximum value from the dataset
279
pub fn max[T](data []T) T {
280
	if data.len == 0 {
281
		return T(0)
282
	}
283
	mut max := data[0]
284
	for v in data {
285
		if v > max {
286
			max = v
287
		}
288
	}
289
	return max
290
}
291

292
// minmax finds the minimum and maximum value from the dataset
293
pub fn minmax[T](data []T) (T, T) {
294
	if data.len == 0 {
295
		return T(0), T(0)
296
	}
297
	mut max := data[0]
298
	mut min := data[0]
299
	for v in data[1..] {
300
		if v > max {
301
			max = v
302
		}
303
		if v < min {
304
			min = v
305
		}
306
	}
307
	return min, max
308
}
309

310
// min_index finds the first index of the minimum value
311
pub fn min_index[T](data []T) int {
312
	if data.len == 0 {
313
		return 0
314
	}
315
	mut min := data[0]
316
	mut min_index := 0
317
	for i, v in data {
318
		if v < min {
319
			min = v
320
			min_index = i
321
		}
322
	}
323
	return min_index
324
}
325

326
// max_index finds the first index of the maximum value
327
pub fn max_index[T](data []T) int {
328
	if data.len == 0 {
329
		return 0
330
	}
331
	mut max := data[0]
332
	mut max_index := 0
333
	for i, v in data {
334
		if v > max {
335
			max = v
336
			max_index = i
337
		}
338
	}
339
	return max_index
340
}
341

342
// minmax_index finds the first index of the minimum and maximum value
343
pub fn minmax_index[T](data []T) (int, int) {
344
	if data.len == 0 {
345
		return 0, 0
346
	}
347
	mut min := data[0]
348
	mut max := data[0]
349
	mut min_index := 0
350
	mut max_index := 0
351
	for i, v in data {
352
		if v < min {
353
			min = v
354
			min_index = i
355
		}
356
		if v > max {
357
			max = v
358
			max_index = i
359
		}
360
	}
361
	return min_index, max_index
362
}
363

364
// range calculates the difference between the min and max
365
// Range ( Maximum - Minimum ) of the given input array
366
// Based on
367
// https://www.mathsisfun.com/data/range.html
368
pub fn range[T](data []T) T {
369
	if data.len == 0 {
370
		return T(0)
371
	}
372
	min, max := minmax[T](data)
373
	return max - min
374
}
375

376
// covariance calculates directional association between datasets
377
// positive value denotes variables move in same direction and negative denotes variables move in opposite directions
378
@[inline]
379
pub fn covariance[T](data1 []T, data2 []T) T {
380
	mean1 := mean[T](data1)
381
	mean2 := mean[T](data2)
382
	return covariance_mean[T](data1, data2, mean1, mean2)
383
}
384

385
// covariance_mean computes the covariance of a dataset with means provided
386
// the recurrence relation
387
pub fn covariance_mean[T](data1 []T, data2 []T, mean1 T, mean2 T) T {
388
	n := int(math.min(data1.len, data2.len))
389
	if n == 0 {
390
		return T(0)
391
	}
392
	mut covariance := T(0)
393
	for i in 0 .. n {
394
		delta1 := data1[i] - mean1
395
		delta2 := data2[i] - mean2
396
		covariance += (delta1 * delta2 - covariance) / (T(i) + 1.0)
397
	}
398
	return covariance
399
}
400

401
// lag1_autocorrelation_mean calculates the correlation between values that are one time period apart
402
// of a dataset, based on the mean
403
@[inline]
404
pub fn lag1_autocorrelation[T](data []T) T {
405
	data_mean := mean[T](data)
406
	return lag1_autocorrelation_mean[T](data, data_mean)
407
}
408

409
// lag1_autocorrelation_mean calculates the correlation between values that are one time period apart
410
// of a dataset, using
411
// the recurrence relation
412
pub fn lag1_autocorrelation_mean[T](data []T, mean T) T {
413
	if data.len == 0 {
414
		return T(0)
415
	}
416
	mut q := T(0)
417
	mut v := (data[0] * mean) - (data[0] * mean)
418
	for i := 1; i < data.len; i++ {
419
		delta0 := data[i - 1] - mean
420
		delta1 := data[i] - mean
421
		q += (delta0 * delta1 - q) / (T(i) + 1.0)
422
		v += (delta1 * delta1 - v) / (T(i) + 1.0)
423
	}
424
	return q / v
425
}
426

427
// kurtosis calculates the measure of the 'tailedness' of the data by finding mean and standard of deviation
428
@[inline]
429
pub fn kurtosis[T](data []T) T {
430
	data_mean := mean[T](data)
431
	sd := population_stddev_mean[T](data, data_mean)
432
	return kurtosis_mean_stddev[T](data, data_mean, sd)
433
}
434

435
// kurtosis_mean_stddev calculates the measure of the 'tailedness' of the data
436
// using the fourth moment the deviations, normalized by the sd
437
pub fn kurtosis_mean_stddev[T](data []T, mean T, sd T) T {
438
	mut avg := T(0) // find the fourth moment the deviations, normalized by the sd
439
	/*
440
	we use a recurrence relation to stably update a running value so
441
         * there aren't any large sums that can overflow
442
	*/
443
	for i, v in data {
444
		x := (v - mean) / sd
445
		avg += (x * x * x * x - avg) / (T(i) + 1.0)
446
	}
447
	return avg - T(3.0)
448
}
449

450
// skew calculates the mean and standard of deviation to find the skew from the data
451
@[inline]
452
pub fn skew[T](data []T) T {
453
	data_mean := mean[T](data)
454
	sd := population_stddev_mean[T](data, data_mean)
455
	return skew_mean_stddev[T](data, data_mean, sd)
456
}
457

458
// skew_mean_stddev calculates the skewness of data
459
pub fn skew_mean_stddev[T](data []T, mean T, sd T) T {
460
	mut skew := T(0) // find the sum of the cubed deviations, normalized by the sd.
461
	/*
462
	we use a recurrence relation to stably update a running value so
463
         * there aren't any large sums that can overflow
464
	*/
465
	for i, v in data {
466
		x := (v - mean) / sd
467
		skew += (x * x * x - skew) / (T(i) + 1.0)
468
	}
469
	return skew
470
}
471

472
// quantile calculates quantile points
473
// for more reference
474
// https://en.wikipedia.org/wiki/Quantile
475
pub fn quantile[T](sorted_data []T, f T) T {
476
	if sorted_data.len == 0 {
477
		return T(0)
478
	}
479
	index := f * (T(sorted_data.len) - 1.0)
480
	lhs := int(index)
481
	delta := index - T(lhs)
482
	return if lhs == sorted_data.len - 1 {
483
		sorted_data[lhs]
484
	} else {
485
		(1.0 - delta) * sorted_data[lhs] + delta * sorted_data[(lhs + 1)]
486
	}
487
}
488
v

Использование cookies