google-research
295 строк · 7.9 Кб
1# Setup -------------------------------------------------------------------
2if (!require(pacman)) install.packages("pacman")
3
4pacman::p_load("tidyverse", "tidyjson")
5
6
7# Functions for Creating Keys ---------------------------------------------
8
9
10# function to extract all scales from an admin session
11extract_all_item_ids <- function(admin_session) {
12# Extracts a list of item_ids for all measures and subscales in the
13# admin_session object
14#
15# Args:
16# admin_session: The admin_session object
17#
18# Returns:
19# A nested list of item_ids for each measure and subscale
20
21item_ids <- list()
22
23# extract list of all measures in admin_session
24measures <- admin_session$..JSON[[1]]$measures
25
26# loop over each measure
27for (measure in names(measures)) {
28item_ids[[measure]] <- list()
29
30# loop over each subscale for the current measure
31for (
32subscale in names(admin_session$..JSON[[1]]$measures[[measure]]$scales)) {
33# extract item_ids for the current subscale
34item_ids[[measure]][[subscale]] <-
35measures[[measure]]$scales[[subscale]]$item_ids |>
36unlist()
37}
38}
39
40return(item_ids)
41}
42
43# function to extract all and reverse-keyed IDs from an admin session
44extract_all_and_reversed_item_ids <- function(admin_session) {
45# Extracts a list of all and reverse-keyed item_ids for all measures and
46# subscales inthe admin_session object.
47#
48# Args:
49# admin_session: The admin_session object
50#
51# Returns:
52# A nested list of reverse-keyed item_ids for each measure and subscale
53
54item_ids <- list()
55
56# extract list of all measures in admin_session
57measures <- admin_session$..JSON[[1]]$measures
58
59# loop over each measure
60for (measure in names(measures)) {
61item_ids[[measure]] <- list()
62
63# loop over each subscale for the current measure
64for (
65subscale in names(admin_session$..JSON[[1]]$measures[[measure]]$scales)) {
66# extract all item_ids for the current subscale
67item_ids[[measure]][[subscale]][["item_ids"]] <-
68measures[[measure]]$scales[[subscale]]$item_ids |>
69unlist()
70
71# extract reversed item_ids for the current subscale
72item_ids[[measure]][[subscale]][["reverse_keyed_item_ids"]] <-
73measures[[measure]]$scales[[subscale]]$reverse_keyed_item_ids |>
74unlist()
75}
76}
77
78return(item_ids)
79}
80
81# function to prepend "-" to reverse-keyed item IDs
82key_item_ids <- function(item_ids, reverse_keyed_item_ids) {
83
84# inner function to key one item ID
85key_one_item_id <- function(item_id) {
86if (item_id %in% reverse_keyed_item_ids) {
87return(paste0("-", item_id))
88} else {
89return(item_id)
90}
91}
92
93keyed_item_ids <- lapply(item_ids, key_one_item_id) |> unlist()
94
95return(keyed_item_ids)
96}
97
98# function that reverse-keys all item IDs
99create_keys <- function(all_nested_item_ids) {
100nested_key <- lapply(all_nested_item_ids, function(measure) {
101lapply(measure, function(subscale) {
102key_item_ids(subscale$item_ids, subscale$reverse_keyed_item_ids)
103})
104})
105return(nested_key)
106}
107
108# main function; converts admin_session to a nested key
109admin_session_to_nested_key <- function(admin_session) {
110
111# extract all nested item_ids
112all_nested_item_ids <- extract_all_and_reversed_item_ids(admin_session)
113
114# add reverse-key syntax to these item_ids
115nested_key = create_keys(all_nested_item_ids)
116
117return(nested_key)
118}
119
120
121
122# Functions for Loading Data ----------------------------------------------
123
124
125# Functions for Slicing Data ----------------------------------------------
126
127data_for_model_id <- function(scored_session_df, model_id_str) {
128# Subsets a scored session by model_id.
129
130result <- scored_session_df |>
131filter(model_id == model_id_str) # nolint: object_usage_linter.
132
133return(result)
134}
135
136
137# Psychometric Functions --------------------------------------------------
138
139subscale_reliability <- function(
140admin_session, df, measure, subscale, metric, min = 1, max = 5) {
141# Calls psych::scoreItems for a given subscale to retrieve Cronbach's Alpha.
142#
143# Uses min and max arguments to ensure proper reverse-keying.
144#
145# Returns:
146# alpha: A float estimate of Cronbach's Alpha.
147
148# validate inputs
149metric <- match.arg(metric, c("alpha", "G6", "omega"))
150
151# get nested key
152nested_key <- admin_session_to_nested_key(admin_session)
153
154# get all item IDs
155all_item_ids <- extract_all_item_ids(admin_session)
156
157# get subscale item IDs
158subscale_item_ids <- all_item_ids[[measure]][[subscale]]
159
160# score subscale
161if (metric %in% c("alpha", "G6")) {
162score_info <- psych::scoreItems(
163keys = nested_key[[measure]][[subscale]],
164items = df |>
165dplyr::select(dplyr::all_of(all_item_ids[[measure]][[subscale]])),
166missing = FALSE,
167delete = TRUE,
168min = min,
169max = max
170)
171
172alpha <- score_info$alpha[1]
173G6 <- score_info$G6[1]
174
175if (metric == "alpha") {
176return(alpha)
177} else if (metric == "G6") {
178return(G6)
179}
180}
181
182if (metric == "omega") {
183omega_info <- psych::omega(
184df |> dplyr::select(dplyr::all_of(subscale_item_ids)) |>
185tidyr::drop_na(),
186plot = FALSE
187)
188omega <- omega_info |> purrr::pluck("omega.tot")
189
190return(omega)
191}
192}
193
194score_subscale <- function(
195admin_session, df, measure, subscale, min = 1, max = 5) {
196# Calls psych::scoreItems for a given subscale.
197#
198# Args:
199# df: A DataFrame containing item-level response data.
200# measure: String ID of measure.
201# subscale: String ID of subscale.
202
203# get nested key
204nested_key = admin_session_to_nested_key(admin_session)
205
206# get all item IDs
207all_item_ids = extract_all_item_ids(admin_session)
208
209score_info <- psych::scoreItems(
210keys = nested_key[[measure]][[subscale]],
211items = df |> select(all_of(all_item_ids[[measure]][[subscale]])),
212missing = FALSE,
213delete = TRUE,
214min = min,
215max = max
216)
217
218alpha = score_info$alpha[1]
219G6 = score_info$G6[1]
220
221return(as.list(data.frame(alpha, G6)))
222# return(score_info$alpha |> pluck(1))
223# return(score_info)
224}
225
226score_measure <- function(admin_session, df, measure, min = 1, max = 5) {
227# Calls psych::scoreItems for a given subscale.
228#
229# Args:
230# df: A DataFrame containing item-level response data.
231# measure: String ID of measure.
232# subscale: String ID of subscale.
233
234# get nested key
235nested_key = admin_session_to_nested_key(admin_session)
236
237# get all item IDs
238all_item_ids = extract_all_item_ids(admin_session)
239
240# measure IDs
241measure_item_ids = all_item_ids[[measure]] |>
242unlist(recursive = F) |> unname()
243
244score_info <- psych::scoreItems(
245keys = nested_key[[measure]],
246items = df |> select(all_of(measure_item_ids)),
247missing = FALSE,
248delete = TRUE,
249min = min,
250max = max
251)
252
253alpha = score_info$alpha
254G6 = score_info$G6
255
256return(score_info)
257# return(score_info$alpha |> pluck(1))
258# return(score_info)
259}
260
261compute_reliabilities <- function(admin_session, scored_session_df) {
262# get nested key
263nested_key = admin_session_to_nested_key(admin_session)
264
265# get all item IDs
266all_item_ids = extract_all_item_ids(admin_session)
267
268scale_reliabilities <-
269all_item_ids |> unlist(recursive = F) |>
270
271# map select df by scale sets of item_ids
272map(., function(x) scored_session_df |>
273dplyr::select(all_of(x)) |>
274
275drop_na() |>
276
277# assume each scale is unidimensional
278omega(nfactors = 1, plot = FALSE) |>
279
280# only keep alpha, G6, and Omega total
281keep(names(.) %in% c("alpha", "G6", "omega.tot"))) |>
282
283# set names of outputed lists to scale names
284set_names(names(nested_key |> unlist(recursive = F))) |>
285
286# convert to data.frame to keep row labels
287map(unlist) |>
288as.data.frame() |>
289
290# suppress warnings and messages (we are ignoring the other omegas anyway)
291suppressWarnings() |> suppressMessages() |>
292t() |> as.data.frame() |> rownames_to_column(var = "Scale")
293
294return(scale_reliabilities)
295}