ClickHouse

Форк
0
/
AggregateFunctionStudentTTest.cpp 
100 строк · 3.3 Кб
1
#include <AggregateFunctions/AggregateFunctionFactory.h>
2
#include <AggregateFunctions/AggregateFunctionTTest.h>
3
#include <AggregateFunctions/FactoryHelpers.h>
4
#include <AggregateFunctions/Moments.h>
5

6

7
namespace ErrorCodes
8
{
9
    extern const int BAD_ARGUMENTS;
10
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
11
}
12

13

14
namespace DB
15
{
16
struct Settings;
17

18
namespace
19
{
20

21
/** Student T-test applies to two samples of independent random variables
22
  * that have normal distributions with equal (but unknown) variances.
23
  * It allows to answer the question whether means of the distributions differ.
24
  *
25
  * If variances are not considered equal, Welch T-test should be used instead.
26
  */
27
struct StudentTTestData : public TTestMoments<Float64>
28
{
29
    static constexpr auto name = "studentTTest";
30

31
    bool hasEnoughObservations() const
32
    {
33
        return nx > 0 && ny > 0 && nx + ny > 2;
34
    }
35

36
    Float64 getDegreesOfFreedom() const
37
    {
38
        return nx + ny - 2;
39
    }
40

41
    std::tuple<Float64, Float64> getResult() const
42
    {
43
        Float64 mean_x = getMeanX();
44
        Float64 mean_y = getMeanY();
45

46
        /// To estimate the variance we first estimate two means.
47
        /// That's why the number of degrees of freedom is the total number of values of both samples minus 2.
48
        Float64 degrees_of_freedom = getDegreesOfFreedom();
49

50
        /// Calculate s^2
51
        /// The original formulae looks like
52
        /// \frac{\sum_{i = 1}^{n_x}{(x_i - \bar{x}) ^ 2} + \sum_{i = 1}^{n_y}{(y_i - \bar{y}) ^ 2}}{n_x + n_y - 2}
53
        /// But we made some mathematical transformations not to store original sequences.
54
        /// Also we dropped sqrt, because later it will be squared later.
55

56
        Float64 all_x = x2 + nx * mean_x * mean_x - 2 * mean_x * x1;
57
        Float64 all_y = y2 + ny * mean_y * mean_y - 2 * mean_y * y1;
58

59
        Float64 s2 = (all_x + all_y) / degrees_of_freedom;
60
        Float64 std_err2 = s2 * (1. / nx + 1. / ny);
61

62
        /// t-statistic
63
        Float64 t_stat = (mean_x - mean_y) / sqrt(std_err2);
64

65
        if (unlikely(!std::isfinite(t_stat)))
66
            return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
67

68
        auto student = boost::math::students_t_distribution<Float64>(getDegreesOfFreedom());
69
        Float64 pvalue = 0;
70
        if (t_stat > 0)
71
            pvalue = 2 * boost::math::cdf<Float64>(student, -t_stat);
72
        else
73
            pvalue = 2 * boost::math::cdf<Float64>(student, t_stat);
74

75
        return {t_stat, pvalue};
76
    }
77
};
78

79
AggregateFunctionPtr createAggregateFunctionStudentTTest(
80
    const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
81
{
82
    assertBinary(name, argument_types);
83

84
    if (parameters.size() > 1)
85
        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires zero or one parameter.", name);
86

87
    if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
88
        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} only supports numerical types", name);
89

90
    return std::make_shared<AggregateFunctionTTest<StudentTTestData>>(argument_types, parameters);
91
}
92

93
}
94

95
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory & factory)
96
{
97
    factory.registerFunction("studentTTest", createAggregateFunctionStudentTTest);
98
}
99

100
}
101

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.