google-research

Форк
0
/
psychometric_utils.R 
295 строк · 7.9 Кб
1
# Setup -------------------------------------------------------------------
2
if (!require(pacman)) install.packages("pacman")
3

4
pacman::p_load("tidyverse", "tidyjson")
5

6

7
# Functions for Creating Keys ---------------------------------------------
8

9

10
# function to extract all scales from an admin session
11
extract_all_item_ids <- function(admin_session) {
12
  # Extracts a list of item_ids for all measures and subscales in the
13
  # admin_session object
14
  #
15
  # Args:
16
  #   admin_session: The admin_session object
17
  #
18
  # Returns:
19
  #   A nested list of item_ids for each measure and subscale
20

21
  item_ids <- list()
22

23
  # extract list of all measures in admin_session
24
  measures <- admin_session$..JSON[[1]]$measures
25

26
  # loop over each measure
27
  for (measure in names(measures)) {
28
    item_ids[[measure]] <- list()
29

30
    # loop over each subscale for the current measure
31
    for (
32
      subscale in names(admin_session$..JSON[[1]]$measures[[measure]]$scales)) {
33
      # extract item_ids for the current subscale
34
      item_ids[[measure]][[subscale]] <-
35
        measures[[measure]]$scales[[subscale]]$item_ids |>
36
        unlist()
37
    }
38
  }
39

40
  return(item_ids)
41
}
42

43
# function to extract all and reverse-keyed IDs from an admin session
44
extract_all_and_reversed_item_ids <- function(admin_session) {
45
  # Extracts a list of all and reverse-keyed item_ids for all measures and
46
  #   subscales inthe admin_session object.
47
  #
48
  # Args:
49
  #   admin_session: The admin_session object
50
  #
51
  # Returns:
52
  #   A nested list of reverse-keyed item_ids for each measure and subscale
53

54
  item_ids <- list()
55

56
  # extract list of all measures in admin_session
57
  measures <- admin_session$..JSON[[1]]$measures
58

59
  # loop over each measure
60
  for (measure in names(measures)) {
61
    item_ids[[measure]] <- list()
62

63
    # loop over each subscale for the current measure
64
    for (
65
      subscale in names(admin_session$..JSON[[1]]$measures[[measure]]$scales)) {
66
      # extract all item_ids for the current subscale
67
      item_ids[[measure]][[subscale]][["item_ids"]] <-
68
        measures[[measure]]$scales[[subscale]]$item_ids |>
69
        unlist()
70

71
      # extract reversed item_ids for the current subscale
72
      item_ids[[measure]][[subscale]][["reverse_keyed_item_ids"]] <-
73
        measures[[measure]]$scales[[subscale]]$reverse_keyed_item_ids |>
74
        unlist()
75
    }
76
  }
77

78
  return(item_ids)
79
}
80

81
# function to prepend "-" to reverse-keyed item IDs
82
key_item_ids <- function(item_ids, reverse_keyed_item_ids) {
83

84
  # inner function to key one item ID
85
  key_one_item_id <- function(item_id) {
86
    if (item_id %in% reverse_keyed_item_ids) {
87
      return(paste0("-", item_id))
88
    } else {
89
      return(item_id)
90
    }
91
  }
92

93
  keyed_item_ids <- lapply(item_ids, key_one_item_id) |> unlist()
94

95
  return(keyed_item_ids)
96
}
97

98
# function that reverse-keys all item IDs
99
create_keys <- function(all_nested_item_ids) {
100
  nested_key <- lapply(all_nested_item_ids, function(measure) {
101
    lapply(measure, function(subscale) {
102
      key_item_ids(subscale$item_ids, subscale$reverse_keyed_item_ids)
103
    })
104
  })
105
  return(nested_key)
106
}
107

108
# main function; converts admin_session to a nested key
109
admin_session_to_nested_key <- function(admin_session) {
110

111
  # extract all nested item_ids
112
  all_nested_item_ids <- extract_all_and_reversed_item_ids(admin_session)
113

114
  # add reverse-key syntax to these item_ids
115
  nested_key = create_keys(all_nested_item_ids)
116

117
  return(nested_key)
118
}
119

120

121

122
# Functions for Loading Data ----------------------------------------------
123

124

125
# Functions for Slicing Data ----------------------------------------------
126

127
data_for_model_id <- function(scored_session_df, model_id_str) {
128
  # Subsets a scored session by model_id.
129

130
  result <- scored_session_df |>
131
    filter(model_id == model_id_str) # nolint: object_usage_linter.
132

133
  return(result)
134
}
135

136

137
# Psychometric Functions --------------------------------------------------
138

139
subscale_reliability <- function(
140
    admin_session, df, measure, subscale, metric, min = 1, max = 5) {
141
  # Calls psych::scoreItems for a given subscale to retrieve Cronbach's Alpha.
142
  #
143
  # Uses min and max arguments to ensure proper reverse-keying.
144
  #
145
  # Returns:
146
  #   alpha: A float estimate of Cronbach's Alpha.
147

148
  # validate inputs
149
  metric <- match.arg(metric, c("alpha", "G6", "omega"))
150

151
  # get nested key
152
  nested_key <- admin_session_to_nested_key(admin_session)
153

154
  # get all item IDs
155
  all_item_ids <- extract_all_item_ids(admin_session)
156

157
  # get subscale item IDs
158
  subscale_item_ids <- all_item_ids[[measure]][[subscale]]
159

160
  # score subscale
161
  if (metric %in% c("alpha", "G6")) {
162
    score_info <- psych::scoreItems(
163
      keys = nested_key[[measure]][[subscale]],
164
      items = df |>
165
        dplyr::select(dplyr::all_of(all_item_ids[[measure]][[subscale]])),
166
      missing = FALSE,
167
      delete = TRUE,
168
      min = min,
169
      max = max
170
    )
171

172
    alpha <- score_info$alpha[1]
173
    G6 <- score_info$G6[1]
174

175
    if (metric == "alpha") {
176
      return(alpha)
177
    } else if (metric == "G6") {
178
      return(G6)
179
    }
180
  }
181

182
  if (metric == "omega") {
183
    omega_info <- psych::omega(
184
      df |> dplyr::select(dplyr::all_of(subscale_item_ids)) |>
185
        tidyr::drop_na(),
186
      plot = FALSE
187
    )
188
    omega <- omega_info |> purrr::pluck("omega.tot")
189

190
    return(omega)
191
  }
192
}
193

194
score_subscale <- function(
195
    admin_session, df, measure, subscale, min = 1, max = 5) {
196
  # Calls psych::scoreItems for a given subscale.
197
  #
198
  # Args:
199
  #   df: A DataFrame containing item-level response data.
200
  #   measure: String ID of measure.
201
  #   subscale: String ID of subscale.
202

203
  # get nested key
204
  nested_key = admin_session_to_nested_key(admin_session)
205

206
  # get all item IDs
207
  all_item_ids = extract_all_item_ids(admin_session)
208

209
  score_info <- psych::scoreItems(
210
    keys = nested_key[[measure]][[subscale]],
211
    items = df |> select(all_of(all_item_ids[[measure]][[subscale]])),
212
    missing = FALSE,
213
    delete = TRUE,
214
    min = min,
215
    max = max
216
  )
217

218
  alpha = score_info$alpha[1]
219
  G6 = score_info$G6[1]
220

221
  return(as.list(data.frame(alpha, G6)))
222
  # return(score_info$alpha |> pluck(1))
223
  # return(score_info)
224
}
225

226
score_measure <- function(admin_session, df, measure, min = 1, max = 5) {
227
  # Calls psych::scoreItems for a given subscale.
228
  #
229
  # Args:
230
  #   df: A DataFrame containing item-level response data.
231
  #   measure: String ID of measure.
232
  #   subscale: String ID of subscale.
233

234
  # get nested key
235
  nested_key = admin_session_to_nested_key(admin_session)
236

237
  # get all item IDs
238
  all_item_ids = extract_all_item_ids(admin_session)
239

240
  # measure IDs
241
  measure_item_ids = all_item_ids[[measure]] |>
242
    unlist(recursive = F) |> unname()
243

244
  score_info <- psych::scoreItems(
245
    keys = nested_key[[measure]],
246
    items = df |> select(all_of(measure_item_ids)),
247
    missing = FALSE,
248
    delete = TRUE,
249
    min = min,
250
    max = max
251
  )
252

253
  alpha = score_info$alpha
254
  G6 = score_info$G6
255

256
  return(score_info)
257
  # return(score_info$alpha |> pluck(1))
258
  # return(score_info)
259
}
260

261
compute_reliabilities <- function(admin_session, scored_session_df) {
262
  # get nested key
263
  nested_key = admin_session_to_nested_key(admin_session)
264

265
  # get all item IDs
266
  all_item_ids = extract_all_item_ids(admin_session)
267

268
  scale_reliabilities <-
269
    all_item_ids |> unlist(recursive = F) |>
270

271
    # map select df by scale sets of item_ids
272
    map(., function(x) scored_session_df |>
273
          dplyr::select(all_of(x)) |>
274

275
          drop_na() |>
276

277
          # assume each scale is unidimensional
278
          omega(nfactors = 1, plot = FALSE) |>
279

280
          # only keep alpha, G6, and Omega total
281
          keep(names(.) %in% c("alpha", "G6", "omega.tot"))) |>
282

283
    # set names of outputed lists to scale names
284
    set_names(names(nested_key |> unlist(recursive = F))) |>
285

286
    # convert to data.frame to keep row labels
287
    map(unlist) |>
288
    as.data.frame() |>
289

290
    # suppress warnings and messages (we are ignoring the other omegas anyway)
291
    suppressWarnings() |> suppressMessages() |>
292
    t() |> as.data.frame() |> rownames_to_column(var = "Scale")
293

294
  return(scale_reliabilities)
295
}

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.