library(shiny)
library(tidyverse)
library(ggplot2)
library(plotly)
library(GGally)
library(here)
library(hablar)
library(janitor)
library(naniar)
library(ComplexUpset)
library(gt)
This document goes over score calculation and missing data imputation for each psychosocial form. The headings in the sidebar help the user navigate to their desired content. The code chunks for each form can be run independently after running Section D.1 in its entirety.
D.2 Scoring and Imputation
D.2.1 The Brief Pain Inventory (BPI)
D.2.1.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the BPI data and keep completed forms, we will call this BPI
<- applyFilter(
BPI "BPI",
bpisf_the_brief_pain_inventory_v23_short_form_bpi_complete )
D.2.1.2 Components:
BPI assesses pain and includes information on:
Widespread body pain measured by Michigan Body Map
General pain intensity measured by Brief pain intensity -whole body pain
Local pain intensity measured by Modified BPI – surgical site pain
Split BPI: We will subset the BPI data into three datasets according to the above information.
<- BPI %>%
bpi_body_map select(
record_id,
guid,
redcap_event_name,
redcap_data_access_group,
cohort,
bpipainanatsiteareatxt,ends_with("rate"),
ends_with("dur"),
bpisf_the_brief_pain_inventory_v23_short_form_bpi_complete
)
<- BPI %>%
bpi_body_pain select(
record_id,
guid,
redcap_event_name,
redcap_data_access_group,
cohort,
bpiworstpainratingexclss,
bpisf_the_brief_pain_inventory_v23_short_form_bpi_complete
)
<- BPI %>%
bpi_pain_intrf select(
record_id,
guid,
redcap_event_name,
redcap_data_access_group,
cohort,
bpiworstpainratingss,
bpipainintfrgnrlactvtyscl,contains("intfr"),
contains("intrfr"),
bpisf_the_brief_pain_inventory_v23_short_form_bpi_complete )
We now have three datasets:
bpi_body_map for widespread body pain - Michigan Body Map
bpi_body_pain for general pain intensity measured by brief pain intensity -whole body pain
bpi_pain_intrf for local pain intensity measured by modified BPI – surgical site pain
Inspect Field names: Make sure that the three datasets have the field names of interest
names(bpi_body_map)
[1] "record_id"
[2] "guid"
[3] "redcap_event_name"
[4] "redcap_data_access_group"
[5] "cohort"
[6] "bpipainanatsiteareatxt"
[7] "bpi_mbm_z1_rate"
[8] "bpi_mbm_z2_rate"
[9] "bpi_mbm_z3_rate"
[10] "bpi_mbm_z4_rate"
[11] "bpi_mbm_z5_rate"
[12] "bpi_mbm_z6_rate"
[13] "bpi_mbm_z7_rate"
[14] "bpi_mbm_z8_rate"
[15] "bpi_mbm_z9_rate"
[16] "bpi_mbm_z1_dur"
[17] "bpi_mbm_z2_dur"
[18] "bpi_mbm_z3_dur"
[19] "bpi_mbm_z4_dur"
[20] "bpi_mbm_z5_dur"
[21] "bpi_mbm_z6_dur"
[22] "bpi_mbm_z7_dur"
[23] "bpi_mbm_z8_dur"
[24] "bpi_mbm_z9_dur"
[25] "bpisf_the_brief_pain_inventory_v23_short_form_bpi_complete"
names(bpi_body_pain)
[1] "record_id"
[2] "guid"
[3] "redcap_event_name"
[4] "redcap_data_access_group"
[5] "cohort"
[6] "bpiworstpainratingexclss"
[7] "bpisf_the_brief_pain_inventory_v23_short_form_bpi_complete"
names(bpi_pain_intrf)
[1] "record_id"
[2] "guid"
[3] "redcap_event_name"
[4] "redcap_data_access_group"
[5] "cohort"
[6] "bpiworstpainratingss"
[7] "bpipainintfrgnrlactvtyscl"
[8] "bpipainintfrmoodscl"
[9] "bpipainintfrwlkablscl"
[10] "bpipainnrmlwrkintrfrscl"
[11] "bpipainrelationsintrfrscl"
[12] "bpipainsleepintrfrscl"
[13] "bpipainenjoymntintrfrscl"
[14] "bpipainintrfrscore"
[15] "bpisf_the_brief_pain_inventory_v23_short_form_bpi_complete"
All three datasets have the desired fields names. We will now look at missing data pattern in each dataset:
D.2.2 Michigan Body Map:
Michigan Body Map records pain intensity and duration for the body region indicated by the subject (Berardi et al., 2022). The painful body regions can vary from subject to subject, which can result in null values in field names for body regions not indicated by the subject, this explains the data sparsity.
D.2.2.1 Missing Data:
Missing data pattern in bpi_body_map
gg_miss_upset(
bpi_body_map,nsets = n_var_miss(bpi_body_map),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
We will create a separate field name for each body region and assign a value of 0 if a subject has not selected a body region and has not indicated the duration and intensity of pain for the selected body region i.e. has not experienced any pain in the selected body region, assign 1 if a subject has indicated painful region(s) as well as specified the rate and duration,and 2 if either value (rate or duration) or none of the values are present for the selected body regions or if either value ( rate or duration) is available but the body region is not specified.
<- bpi_body_map %>%
bpi_body_map mutate(
head_face_jaw = case_when(
grepl("(a00|a01|a02|a25)", bpipainanatsiteareatxt)) ~ 1,
(TRUE ~ 0
)%>%
) mutate(
neck = case_when((grepl("a26", bpipainanatsiteareatxt)) ~ 1, TRUE ~ 0)
%>%
) mutate(
chest_or_breast = case_when(
grepl("(a03|a04)", bpipainanatsiteareatxt)) ~ 1,
(TRUE ~ 0
)%>%
) mutate(
abd_pelvis_groin = case_when(
grepl("(a13|a14|a15|a16)", bpipainanatsiteareatxt)) ~ 1,
(TRUE ~ 0
)%>%
) mutate(
right_shoulder_arm_wrist = case_when(
grepl("(a28|a05|a07|a09|a11)", bpipainanatsiteareatxt)) ~ 1,
(TRUE ~ 0
)%>%
) mutate(
left_shoulder_arm_wrist = case_when(
grepl("(a27|a06|a08|a10|a12)", bpipainanatsiteareatxt)) ~ 1,
(TRUE ~ 0
)%>%
) mutate(
back_buttocks = case_when(
grepl("(a29|a30|a33|a34)", bpipainanatsiteareatxt)) ~ 1,
(TRUE ~ 0
)%>%
) mutate(
right_hip_leg_foot = case_when(
grepl("(a32|a17|a19|a21|a23)", bpipainanatsiteareatxt)) ~ 1,
(TRUE ~ 0
)%>%
) mutate(
left_hip_leg_foot = case_when(
grepl("(a31|a18|a20|a22|a24)", bpipainanatsiteareatxt)) ~ 1,
(TRUE ~ 0
)%>%
) mutate(
head_face_jaw_m = case_when(
== 0 & is.na(bpi_mbm_z1_rate) & is.na(bpi_mbm_z1_dur) ~ 0,
head_face_jaw == 1 & !is.na(bpi_mbm_z1_rate) & !is.na(bpi_mbm_z1_dur) ~ 1,
head_face_jaw TRUE ~ 2
)%>%
) mutate(
neck_m = case_when(
== 0 & is.na(bpi_mbm_z2_rate) & is.na(bpi_mbm_z2_dur) ~ 0,
neck == 1 & !is.na(bpi_mbm_z2_rate) & !is.na(bpi_mbm_z2_dur) ~ 1,
neck TRUE ~ 2
)%>%
) mutate(
chest_or_breast_m = case_when(
== 0 & is.na(bpi_mbm_z3_rate) & is.na(bpi_mbm_z3_dur) ~ 0,
chest_or_breast == 1 & !is.na(bpi_mbm_z3_rate) & !is.na(bpi_mbm_z3_dur) ~
chest_or_breast 1,
TRUE ~ 2
)%>%
) mutate(
abd_pelvis_groin_m = case_when(
== 0 & is.na(bpi_mbm_z4_rate) & is.na(bpi_mbm_z4_dur) ~
abd_pelvis_groin 0,
== 1 & !is.na(bpi_mbm_z4_rate) & !is.na(bpi_mbm_z4_dur) ~
abd_pelvis_groin 1,
TRUE ~ 2
)%>%
) mutate(
right_shoulder_arm_wrist_m = case_when(
== 0 &
right_shoulder_arm_wrist is.na(bpi_mbm_z5_rate) &
is.na(bpi_mbm_z5_dur) ~
0,
== 1 &
right_shoulder_arm_wrist !is.na(bpi_mbm_z5_rate) &
!is.na(bpi_mbm_z5_dur) ~
1,
TRUE ~ 2
)%>%
) mutate(
left_shoulder_arm_wrist_m = case_when(
== 0 &
left_shoulder_arm_wrist is.na(bpi_mbm_z6_rate) &
is.na(bpi_mbm_z6_dur) ~
0,
== 1 &
left_shoulder_arm_wrist !is.na(bpi_mbm_z6_rate) &
!is.na(bpi_mbm_z6_dur) ~
1,
TRUE ~ 2
)%>%
) mutate(
back_buttocks_m = case_when(
== 0 & is.na(bpi_mbm_z7_rate) & is.na(bpi_mbm_z7_dur) ~ 0,
back_buttocks == 1 & !is.na(bpi_mbm_z7_rate) & !is.na(bpi_mbm_z7_dur) ~ 1,
back_buttocks TRUE ~ 2
)%>%
) mutate(
right_hip_leg_foot_m = case_when(
== 0 & is.na(bpi_mbm_z8_rate) & is.na(bpi_mbm_z8_dur) ~
right_hip_leg_foot 0,
== 1 &
right_hip_leg_foot !is.na(bpi_mbm_z8_rate) &
!is.na(bpi_mbm_z8_dur) ~
1,
TRUE ~ 2
)%>%
) mutate(
left_hip_leg_foot_m = case_when(
== 0 & is.na(bpi_mbm_z9_rate) & is.na(bpi_mbm_z9_dur) ~
left_hip_leg_foot 0,
== 1 &
left_hip_leg_foot !is.na(bpi_mbm_z9_rate) &
!is.na(bpi_mbm_z9_dur) ~
1,
TRUE ~ 2
)%>%
) mutate(across(ends_with("_m"), as.factor)) %>%
relocate(
record_id,
guid,
redcap_event_name,
redcap_data_access_group,
bpipainanatsiteareatxt,
head_face_jaw,
head_face_jaw_m,
bpi_mbm_z1_rate,
bpi_mbm_z1_dur,
neck,
neck_m,
bpi_mbm_z2_rate,
bpi_mbm_z2_dur,
chest_or_breast,
chest_or_breast_m,
bpi_mbm_z3_rate,
bpi_mbm_z3_dur,
abd_pelvis_groin,
abd_pelvis_groin_m,
bpi_mbm_z4_rate,
bpi_mbm_z4_dur,
right_shoulder_arm_wrist,
right_shoulder_arm_wrist_m,
bpi_mbm_z5_rate,
bpi_mbm_z5_dur,
left_shoulder_arm_wrist,
left_shoulder_arm_wrist_m,
bpi_mbm_z6_rate,
bpi_mbm_z6_dur,
back_buttocks,
back_buttocks_m,
bpi_mbm_z7_rate,
bpi_mbm_z7_dur,
right_hip_leg_foot,
right_hip_leg_foot_m,
bpi_mbm_z8_rate,
bpi_mbm_z8_dur,
left_hip_leg_foot,
left_hip_leg_foot_m,
bpi_mbm_z9_rate,
bpi_mbm_z9_dur )
Calculate number of pain areas for each subject and name it “number_of_pain_areas”
<- c(
body_vector "head_face_jaw",
"neck",
"chest_or_breast",
"abd_pelvis_groin",
"right_shoulder_arm_wrist",
"left_shoulder_arm_wrist",
"back_buttocks",
"right_hip_leg_foot",
"left_hip_leg_foot"
)
<- bpi_body_map %>%
bpi_body_map rowwise() %>%
mutate(
number_of_pain_areas = rowSums(across(all_of(body_vector)), na.rm = TRUE)
)
D.2.2.2 New field name(s)
Add new field names for the michigan body map to the data dictionary
# Create new field names
# head face Jaw
<- data.frame(
jawm_new_row field_name = "head_face_jaw_m",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "Factor",
select_choices_or_calculations = "0, Subject has not selected the head/face/jaw and has not indicated the duration and intensity of pain for the selected body region|1,Subject has indicated painful region as well as specified the rate and duration|2, if either value ( rate and duration) or none of the values are present for the selected body region or if either value ( rate and duration) is available but the body region is not specified"
)
<- data.frame(
jaw_new_row field_name = "head_face_jaw",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "numeric",
select_choices_or_calculations = "0, Subject has not selected head/face/jaw |1,Subject has selected head/face/jaw"
)
# neck
<- data.frame(
neckm_new_row field_name = "neck_m",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "Factor",
select_choices_or_calculations = "0, Subject has not selected neck and has not indicated the duration and intensity of pain for the selected body region|1,Subject has indicated painful region as well as specified the rate and duration|2, if either value ( rate and duration) or none of the values are present for the selected body region or if either value ( rate and duration) is available but the body region is not specified"
)
<- data.frame(
neck_new_row field_name = "neck",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "numeric",
select_choices_or_calculations = "0, Subject has not selected neck |1,Subject has selected neck"
)
# chest_or_breast
<- data.frame(
chest_or_breastm_new_row field_name = "chest_or_breast_m",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "Factor",
select_choices_or_calculations = "0, Subject has not selected chest_or_breast and has not indicated the duration and intensity of pain for the selected body region|1,Subject has indicated painful region as well as specified the rate and duration|2, if either value ( rate and duration) or none of the values are present for the selected body region or if either value ( rate and duration) is available but the body region is not specified"
)
<- data.frame(
chest_or_breast_new_row field_name = "chest_or_breast",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "numeric",
select_choices_or_calculations = "0, Subject has not selected chest_or_breast |1,Subject has selected chest_or_breast"
)
# abd_pelvis_groin
<- data.frame(
abd_pelvis_groinm_new_row field_name = "abd_pelvis_groin_m",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "Factor",
select_choices_or_calculations = "0, Subject has not selected abd_pelvis_groin and has not indicated the duration and intensity of pain for the selected body region|1,Subject has indicated painful region as well as specified the rate and duration|2, if either value ( rate and duration) or none of the values are present for the selected body region or if either value ( rate and duration) is available but the body region is not specified"
)
<- data.frame(
abd_pelvis_groin_new_row field_name = "abd_pelvis_groin",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "numeric",
select_choices_or_calculations = "0, Subject has not selected abd_pelvis_groin |1,Subject has selected abd_pelvis_groin"
)
# right_shoulder_arm_wrist
<- data.frame(
right_shoulder_arm_wristm_new_row field_name = "right_shoulder_arm_wrist_m",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "Factor",
select_choices_or_calculations = "0, Subject has not selected right_shoulder_arm_wrist and has not indicated the duration and intensity of pain for the selected body region|1,Subject has indicated painful region as well as specified the rate and duration|2, if either value ( rate and duration) or none of the values are present for the selected body region or if either value ( rate and duration) is available but the body region is not specified"
)
<- data.frame(
right_shoulder_arm_wrist_new_row field_name = "right_shoulder_arm_wrist",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "numeric",
select_choices_or_calculations = "0, Subject has not selected right_shoulder_arm_wrist |1,Subject has selected right_shoulder_arm_wrist"
)
# left_shoulder_arm_wrist
<- data.frame(
left_shoulder_arm_wristm_new_row field_name = "left_shoulder_arm_wrist_m",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "Factor",
select_choices_or_calculations = "0, Subject has not selected left_shoulder_arm_wrist and has not indicated the duration and intensity of pain for the selected body region|1,Subject has indicated painful region as well as specified the rate and duration|2, if either value ( rate and duration) or none of the values are present for the selected body region or if either value ( rate and duration) is available but the body region is not specified"
)
<- data.frame(
left_shoulder_arm_wrist_new_row field_name = "left_shoulder_arm_wrist",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "numeric",
select_choices_or_calculations = "0, Subject has not selected left_shoulder_arm_wrist |1,Subject has selected left_shoulder_arm_wrist"
)
# back_buttocks
<- data.frame(
back_buttocksm_new_row field_name = "back_buttocks_m",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "Factor",
select_choices_or_calculations = "0, Subject has not selected back_buttocks and has not indicated the duration and intensity of pain for the selected body region|1,Subject has indicated painful region as well as specified the rate and duration|2, if either value ( rate and duration) or none of the values are present for the selected body region or if either value ( rate and duration) is available but the body region is not specified"
)
<- data.frame(
back_buttocks_new_row field_name = "back_buttocks",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "numeric",
select_choices_or_calculations = "0, Subject has not selected back_buttocks |1,Subject has selected back_buttocks"
)
# right_hip_leg_foot
<- data.frame(
right_hip_leg_footm_new_row field_name = "right_hip_leg_foot_m",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "Factor",
select_choices_or_calculations = "0, Subject has not selected right_hip_leg_foot and has not indicated the duration and intensity of pain for the selected body region|1,Subject has indicated painful region as well as specified the rate and duration|2, if either value ( rate and duration) or none of the values are present for the selected body region or if either value ( rate and duration) is available but the body region is not specified"
)
<- data.frame(
right_hip_leg_foot_new_row field_name = "right_hip_leg_foot",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "numeric",
select_choices_or_calculations = "0, Subject has not selected right_hip_leg_foot |1,Subject has selected right_hip_leg_foot"
)
# left_hip_leg_foot
<- data.frame(
left_hip_leg_footm_new_row field_name = "left_hip_leg_foot_m",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "Factor",
select_choices_or_calculations = "0, Subject has not selected left_hip_leg_foot and has not indicated the duration and intensity of pain for the selected body region|1,Subject has indicated painful region as well as specified the rate and duration|2, if either value ( rate and duration) or none of the values are present for the selected body region or if either value ( rate and duration) is available but the body region is not specified"
)
<- data.frame(
left_hip_leg_foot_new_row field_name = "left_hip_leg_foot",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "numeric",
select_choices_or_calculations = "0, Subject has not selected left_hip_leg_foot |1,Subject has selected left_hip_leg_foot"
)
# number_of_pain_areas
<- data.frame(
pain_areas_new_row field_name = "number_of_pain_areas",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "numeric",
select_choices_or_calculations = "Sum of pain areas for each subject"
)
# Add new rows
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!jawm_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!jaw_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!neckm_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!neck_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!chest_or_breastm_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!chest_or_breast_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!abd_pelvis_groinm_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!abd_pelvis_groin_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!right_shoulder_arm_wristm_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!right_shoulder_arm_wrist_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!left_shoulder_arm_wristm_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!left_shoulder_arm_wrist_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!back_buttocksm_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!back_buttocks_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!right_hip_leg_footm_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!right_hip_leg_foot_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!left_hip_leg_footm_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!left_hip_leg_foot_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!pain_areas_new_row,
.after = match("bpi_mbm_z9_dur", psy_soc_dict$field_name)
)
D.2.2.3 Save:
Create a folder for recoded BPI data and save “bpi_bpdy_map” and updated data dictionary as .csv files
write_csv(
bpi_body_map,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_bpi_body_map.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.3 Brief pain intensity -whole body pain
Brief pain intensity -whole body pain assesses other pain intensity (excluding surgical site) in the last 24 hours.
D.2.3.1 Missing Data:
Missing data pattern in bpi_body_pain
%>%
bpi_body_pain miss_var_summary()
variable | n_miss | pct_miss |
---|---|---|
bpiworstpainratingexclss | 44 | 3.29 |
record_id | 0 | 0 |
guid | 0 | 0 |
redcap_event_name | 0 | 0 |
redcap_data_access_group | 0 | 0 |
cohort | 0 | 0 |
bpisf_the_brief_pain_inventory_v23_short_form_bpi_complete | 0 | 0 |
There are 32 observations (4.12%) with no response to the question i.e null values for the field name “bpiworstpainratingexclss”. We will create a variable “body_pain_intensity” and assign a value of 0 if a subject has not responded to the question, assign 1 if a subject has responded to the question
<- bpi_body_pain %>%
bpi_body_pain mutate(body_pain_intensity = ifelse(is.na(bpiworstpainratingexclss), 0, 1))
We will now pass the field name “bpi_body_pain” as an argument to the table function to look at the frequencies.
table(bpi_body_pain$body_pain_intensity, exclude = FALSE)
0 1
44 1293
D.2.3.2 New field name(s)
Add the field name “body_pain_intensity” to the data dictionary
# Create field names
<- data.frame(
bp_in_new_row field_name = "body_pain_intensity",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "Factor",
select_choices_or_calculations = "0, No body pain intensity specified |1, body pain intensity specified"
)
# Add the new row
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bp_in_new_row,
.after = match("bpipainintrfrscore", psy_soc_dict$field_name)
)
D.2.3.3 Save:
Save “bpi_body_pain” and updated data dictionary as .csv files in the folder named “reformatted_bpi”
write_csv(
bpi_body_pain,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_bpi_body_pain.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.4 Modified BPI – surgical site pain:
Modified BPI – surgical site pain records pain intensity and interference. The primary outcome is the worst pain intensity at the surgical site i.e field name “bpiworstpainratingss” over the past 24 hours,using a scale of 0 to 10. The form also includes the pain interference sub-scale (“Cleeland CS Ryan KM. Pain Assessment,” 1995), which assesses the level of interference in the subject’s daily activities over the past 24 hours (Berardi et al., 2022) due to surgical site pain. Pain interference is scored as the mean of seven interference items, in case of missing data, this mean can still be used if there is a response to at least 4 of 7 items (BPI user guide).
D.2.4.1 Missing Data:
Missing data pattern in bpi_pain_intrf if bpiworstpainratingss is not 0
gg_miss_upset(
%>%
bpi_pain_intrf filter(bpiworstpainratingss != 0),
nsets = n_var_miss(bpi_pain_intrf),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
We will create a variable “pain_intrf” and assign a value of 1 if worst pain intensity at the surgical site is available and at least 4 of 7 pain interference item are available, 2 if response to worst pain intensity at the surgical site is available and less than or equal to 3 of 7 pain interference items are available, 3 if there is no response to worst pain intensity at the surgical site but at least 4 out 7 pain interference items are available, 4 if there is no response to worst pain intensity at the surgical site and less than or equal to 3 of 7 pain interference items are available.
<- bpi_pain_intrf %>%
bpi_pain_intrf mutate(
intrf_not_na = rowSums(
!is.na(select(., "bpipainintfrgnrlactvtyscl":"bpipainenjoymntintrfrscl"))
)%>%
) mutate(
pain_intrf = case_when(
!is.na(bpiworstpainratingss) & intrf_not_na >= 4 ~ 1,
!is.na(bpiworstpainratingss) & intrf_not_na <= 3 ~ 2,
is.na(bpiworstpainratingss) & intrf_not_na >= 4 ~ 3,
is.na(bpiworstpainratingss) & intrf_not_na <= 3 ~ 4,
TRUE ~ 0
)%>%
) select(-intrf_not_na) %>%
as_tibble() %>%
mutate(pain_intrf = as.factor(pain_intrf))
We will create a new field “new_bpipainintrfrscore” with pain interference scores for observations that meet the conditions pain_intrf = 1 or pain_intrf = 3.
<- bpi_pain_intrf %>%
bpi_pain_intrf mutate(
new_bpipainintrfrscore = case_when(
== 1 | pain_intrf == 3 ~ bpipainintrfrscore
pain_intrf
) )
Compare the distributions for bpipainintrfrscore and new_bpipainintrfrscore
summary(bpi_pain_intrf$new_bpipainintrfrscore)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0000 0.8571 3.4286 3.5069 5.5714 10.0000 40
summary(bpi_pain_intrf$bpipainintrfrscore)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0000 0.8571 3.4286 3.4807 5.5714 10.0000 23
We see that there are more NA values for new_bpipainintrfrscore since this score was calculated for the observation that met the condition of more than or equal to 4 of 7 pain interference responses available.
D.2.4.2 New field name(s)
Add the field name “pain_intrf” and “new_bpipainintrfrscore” to the data dictionary
# Create field names
<- data.frame(
pain_intrf_new_row1 field_name = "pain_intrf",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "Factor",
select_choices_or_calculations = "1,worst pain intensity at the surgical site and at least 4 of 7 pain interference items are available|2, worst pain intensity at the surgical site and less than or equal to 3 out 7 pain interference items are available| 3, no response to worst pain intensity at the surgical site but at least 4 out 7 pain interference items are available |4, no response to worst pain intensity at the surgical site and less than or equal to 3 out 7 pain interference items are available"
)
# new row for new_bpipainintrfrscore
<- data.frame(
pain_intrf_new_row2 field_name = "new_bpipainintrfrscore",
form_name = "bpisf_the_brief_pain_inventory_v23_short_form_bpi",
field_type = "numeric",
select_choices_or_calculations = "mean of interference items,if there is a response to at least 4 of 7 items"
)
# Add new rows
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!pain_intrf_new_row1,
.after = match("bpipainintrfrscore", psy_soc_dict$field_name)
%>%
) add_row(
!!!pain_intrf_new_row2,
.after = match("bpipainintrfrscore", psy_soc_dict$field_name)
)
D.2.4.3 Save:
Save “bpi_pain_intrf” and updated data dictionary as .csv files in the folder named “reformatted_bpi”
write_csv(
bpi_pain_intrf,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_bpi_pain_intrf.csv"
)
)
write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.5 Knee Injury Osteoarthritis Outcome Score (KOOS-12)
KOOS-12 is a 12-item scale that contains three sub scales (Roos et al., 1998):
4 KOOS Pain items
4 KOOS Function (Activities of Daily Living and Sport/Recreation) items
4 KOOS Quality of Life (QOL) items to assesses knee pain none to extreme).
There must be a response to at least half of the items in each sub scale to calculate a sub scale score KOOS calculator. These scores are transformed to range from 0 to 100, where 0 represents extreme problems and 100 represents no problems.
D.2.5.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the KOOS-12 data, keeping completed forms will subset to the TKA cohort by default, we will call this koos
<- applyFilter(
koos "koos",
knee_injury_osteoarthritis_outcome_score_koos12_complete )
D.2.5.2 Missing Data:
Missing data pattern in koos.
gg_miss_upset(
koos,nsets = n_var_miss(koos),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
If at least half of the items are answered for a given subscale, a subscale score can be calculated as follows KOOS calculator:
\[ 100-(mean (subscale score) \times 100 / 4) \]
The mean of all three subscale scores is then used to construct an overall KOOS-12 Summary knee impact score. As with sub scale scores, KOOS-12 summary scores range from 0-100, where 0 represents extreme problems and 100 represents no problems. A Summary impact score is not calculated if any of the three scale scores are missing.
Using the above equation, we will create three subscale scores:
pain_score for 4 KOOS Pain items
func_score for 4 KOOS Function (Activities of Daily Living and Sport/Recreation) items
qol_score for 4 KOOS Quality of Life (QOL) items.
We will also create a summary impact score “pain_summary”
<- c(
pain "koospainfreqscl",
"koospainwalkflatscl",
"koospainstairsscl",
"koospainsitlyingscl"
)
<- c(
func "koosfuncdiffrisesitscl",
"koosfuncdiffstandscl",
"koosfuncdiffcarscl",
"koosfunctwistpivotscl"
)
<- c(
qol "koosqolkneeawarescl",
"koosqollifestylemodscl",
"koosqolconfidencescl",
"koosqolkneedifficultyscl"
)
<- c("new_pain_score", "new_func_score", "new_qol_score")
all_koos
<- koos %>%
koos mutate(pain_not_na = rowSums(!is.na(select(., all_of(pain))))) %>%
mutate(func_not_na = rowSums(!is.na(select(., all_of(func))))) %>%
mutate(qol_not_na = rowSums(!is.na(select(., all_of(qol))))) %>%
mutate(
new_pain_score = case_when(
>= 2 ~
pain_not_na 100 - ((rowMeans(select(., all_of(pain)), na.rm = TRUE) * 100) / 4)
)%>%
) mutate(
new_func_score = case_when(
>= 2 ~
func_not_na 100 - ((rowMeans(select(., all_of(func)), na.rm = TRUE) * 100) / 4)
)%>%
) mutate(
new_qol_score = case_when(
>= 2 ~
qol_not_na 100 - ((rowMeans(select(., all_of(qol)), na.rm = TRUE) * 100) / 4)
)%>%
) mutate(new_pain_summary = rowMeans(select(., all_of(all_koos))))
We will now check the frequencies of all the scores (new vs. original) after accounting for missing data.
<- koos %>%
new_koos select(c(
"new_qol_score",
"new_pain_score",
"new_func_score",
"new_pain_summary"
))<- koos %>%
old_koos select(c(
"koospainscoret",
"koosfunctionscoret",
"koosqolscoret",
"koossummaryscore"
))
Upset plot below shows missing scores in the original dataset
gg_miss_upset(
old_koos,nsets = n_var_miss(old_koos),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Upset plot below shows missing scores for observation that don’t meet the criteria for calculating a scale score i.e. If at least half of the items are answered
gg_miss_upset(
new_koos,nsets = n_var_miss(new_koos),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
D.2.5.3 New field name(s)
Add the field names “new_qol_score”,“new_pain_score”, “new_func_score” and “new_pain_summary” to the data dictionary
# Create New field name(s)s
<- data.frame(
qol_score_new_row field_name = "new_qol_score",
form_name = "knee_injury_osteoarthritis_outcome_score_koos12",
field_type = "numeric",
field_label = "0 represents extreme problems and 100 represents no problems",
field_note = "If at least half of the items are answered for quality of life subscale, a subscale score can be calculated as follows 100-(mean (quality of life subscale) X 100 / 4"
)
<- data.frame(
func_score_new_row field_name = "new_func_score",
form_name = "knee_injury_osteoarthritis_outcome_score_koos12",
field_type = "numeric",
field_label = "0 represents extreme problems and 100 represents no problems",
field_note = "If at least half of the items are answered for function subscale, a subscale score can be calculated as follows 100-(mean (function subscale) X 100 / 4"
)
<- data.frame(
pain_score_new_row field_name = "new_pain_score",
form_name = "knee_injury_osteoarthritis_outcome_score_koos12",
field_type = "numeric",
field_label = "0 represents extreme problems and 100 represents no problems",
field_note = "If at least half of the items are answered for pain subscale, a subscale score can be calculated as follows 100-(mean (pain subscale) X 100 / 4"
)
<- data.frame(
pain_summary_new_row field_name = "new_pain_summary",
form_name = "knee_injury_osteoarthritis_outcome_score_koos12",
field_type = "numeric",
field_label = "0 represents extreme problems and 100 represents no problems",
field_note = "If all three scale scores are available, The mean of all three subscale scores is then used to construct an overall KOOS-12 Summary knee impact score"
)
# Add new rows
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!qol_score_new_row,
.after = match("koosqolscoret", psy_soc_dict$field_name)
%>% # new qol score
) add_row(
!!!func_score_new_row,
.after = match("koosfunctionscoret", psy_soc_dict$field_name)
%>% # new func score
) add_row(
!!!pain_score_new_row,
.after = match("koospainscoret", psy_soc_dict$field_name)
# new pain score
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!pain_summary_new_row,
.after = match("koossummaryscore", psy_soc_dict$field_name)
# new summary score )
D.2.5.4 Save:
Save “koos” and updated data dictionary as .csv files in the folder named “reformatted_koos”
write_csv(
koos,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_koos.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.6 Generalized Anxiety Disorder 7 Item (GAD7) Scale Score
The GAD-7 is a 7-item likert scale questionnaire that assesses anxiety levels over the last two weeks. The response to each item is rated on a scale of 0 (“not at all”) to 3 (“nearly every day”). A total score is calculated by adding the scores for all the 7 items and ranges from 0 to 21. A greater total score on the GAD-7 reflects higher anxiety levels (Spitzer et al., 2006). At the end of the questionnaire there is an additional question (“gad7difficulttowork”) to evaluate the subject’s perception about the impact of identified problems on their activities such as work, taking care of things at home, or interacting with others.
D.2.6.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the GAD7 data and keep completed forms, we will call this gad
<- applyFilter(
gad "gad",
generalized_anxiety_disorder_7_item_gad7_scale_sco_complete )
D.2.6.2 Missing Data:
Missing data pattern in gad. We will exclude “gad7difficulttowork” since the response to this field name is conditional on the responses to the rest of the GAD7 items.
gg_miss_upset(
%>%
gad select(-gad7difficulttowork),
nsets = n_var_miss(gad),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Handling missing data:
In case of missing values, a total score can still be calculated if responses to 5 of 7 items are available (less than or equal to 29% missing data). The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the 7 items(Arrieta et al., 2017).
There are two observations with complete data missing, we will remove these observations. We will create a variable “gad_diff” and assign a value of 1 if there is a response to “gad7difficulttowork” and less than or equal to 2 GAD items are missing, 2 if there is a response to “gad7difficulttowork” and more than 2 GAD items are missing, 3 if there is no response to “gad7difficulttowork” and less than or equal to 2 GAD items are missing, 4 if there is no response to “gad7difficulttowork” more than 2 GAD items are missing.
<- c(
gad_vector "gad2feelnervscl",
"gad2notstopwryscl",
"gad7wrytoomchscl",
"gad7troubrelxscl",
"gad7rstlessscl",
"gad7easyannoyedscl",
"gad7feelafrdscl"
)
<- gad %>%
gad mutate(gad_not_na = rowSums(!is.na(select(., all_of(gad_vector))))) %>%
mutate(
gad_diff = case_when(
!is.na(gad7difficulttowork) & gad_not_na >= 5 ~ 1,
!is.na(gad7difficulttowork) & gad_not_na < 5 ~ 2,
is.na(gad7difficulttowork) & gad_not_na >= 5 ~ 3,
is.na(gad7difficulttowork) & gad_not_na < 5 ~ 4,
TRUE ~ 0
)%>%
) as_tibble() %>%
mutate(gad_diff = as.factor(gad_diff)) %>%
filter(gad_not_na != 0)
We will now replace missing values with the rounded mean of remaining items if gad_diff = 1 or gad_diff = 3 i.e less than or equal to 2 GAD items are missing
<- c(
gad_vector "gad2feelnervscl",
"gad2notstopwryscl",
"gad7wrytoomchscl",
"gad7troubrelxscl",
"gad7rstlessscl",
"gad7easyannoyedscl",
"gad7feelafrdscl"
)
<- gad %>%
gad filter(gad_not_na != 0) %>%
mutate(
mean_gad = case_when(
== 1 | gad_diff == 3 ~
gad_diff round(rowMeans(select(., all_of(gad_vector)), na.rm = TRUE))
)%>%
) mutate(across(all_of(gad_vector), ~ if_else(is.na(.), mean_gad, .))) %>%
mutate(
imputed_gad_score = case_when(
== 1 | gad_diff == 3 ~
gad_diff rowSums(select(., all_of(gad_vector)), na.rm = TRUE)
) )
Check missingness pattern after imputation
gg_miss_upset(
%>%
gad select(-gad7difficulttowork, -mean_gad, -imputed_gad_score),
nsets = n_var_miss(gad),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
check distributions of gad scores before (gad7totscore) and after imputation (imputed_gad_score)
<- gad %>%
gad_plot filter(between(gad_not_na, 5, 6)) %>%
select(guid, gad7totscore, imputed_gad_score) %>%
pivot_longer(
cols = c("gad7totscore", "imputed_gad_score"),
values_to = "score",
names_to = "score_name"
)
ggplot(gad_plot, aes(x = score_name, y = score, fill = score_name)) +
geom_violin(alpha = .25)
Remove intermediate field names not needed
<- gad %>%
gad select(-gad_not_na, -mean_gad)
D.2.6.3 New field name(s)
Add the field names “gad_diff” and “imputed_gad_score”to the data dictionary
# Create field names
<- data.frame(
gad_diff_new_row field_name = "gad_diff",
form_name = "generalized_anxiety_disorder_7_item_gad7_scale_sco",
field_type = "factor",
select_choices_or_calculations = "1, if there is a response to gad7difficulttowork and less than or equal to 2 GAD items are missing|2, if there is a response to gad7difficulttowork and more than 2 GAD items are missing|3,if there is no response to gad7difficulttowork and less than or equal to 2 GAD items are missing|4,if there is no response to gad7difficulttowork more 2 GAD items are missing"
)
<- data.frame(
gad_score_new_row field_name = "imputed_gad_score",
form_name = "generalized_anxiety_disorder_7_item_gad7_scale_sco",
field_type = "numeric",
field_label = " A total score ranges from 0 to 21. A greater total score on the GAD-7 reflects higher anxiety levels",
field_note = "The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the 7 items"
)
# Adding new rows to the data dictionary
# adding gad_diff
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!gad_diff_new_row,
.after = match("gad7difficulttowork", psy_soc_dict$field_name)
)
# imputed gad score score
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!gad_score_new_row,
.after = match("gad7difficulttowork", psy_soc_dict$field_name)
)
D.2.6.4 Save:
Save “gad” and updated data dictionary as .csv files in the folder named “reformatted_gad”
write_csv(
gad,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_gad.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.7 Patient Health Questionnaire Depression Scale (PHQ) Scored
The PHQ is a 8-item likert scale questionnaire that assesses depression levels over the last two weeks. The response to each item is rated on a scale of 0 (“not at all”) to 3 (“nearly every day”). The total score is calculated by adding the scores for all the 8 items. A score greater than 10 on PHQ indicates major depressive disorder(Kroenke et al., 2009).
D.2.7.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the PHQ data and keep completed forms, we will call phq
<- applyFilter(
phq "phq",
patient_health_questionnaire_depression_scale_phq_complete )
D.2.7.2 Missing Data:
Missing data pattern in phq.
gg_miss_upset(
phq,nsets = n_var_miss(phq),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Handling missing data:
In case of missing values, a total score can still be calculated if responses to 6 of 8 items are available (less than or equal to 29% missing data). The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the 8 items(Arrieta et al., 2017).
None of the observations have complete data missing. We will create a variable “phq_diff” and assign a value of 1 if there is a response to more than or equal to 6 of 8 PHQ items and 0 if there is a response to less than 6 of 8 PHQ items.
<- c(
phq_vector "phqlitintrstscore",
"phqdeprssnscore",
"phqsleepimpairscore",
"phqtirdlittleenrgyscore",
"phqabnrmldietscore",
"phqflngfailrscore",
"phqconcntrtnimprmntscore",
"phqmovmntspchimprmntscore"
)
<- phq %>%
phq mutate(phq_not_na = rowSums(!is.na(select(., all_of(phq_vector))))) %>%
mutate(
phq_diff = case_when(
>= 6 ~ 1,
phq_not_na TRUE ~ 0
)%>%
) as_tibble() %>%
mutate(phq_diff = as.factor(phq_diff))
We will now replace missing values with the rounded mean of remaining items if phq_diff = 1 i.e if less than or equal to 2 PHQ items are missing and calculate the total score by taking a sum of all the items.
<- phq %>%
phq mutate(
mean_phq = case_when(
== 1 ~
phq_diff round(rowMeans(select(., all_of(phq_vector)), na.rm = TRUE))
)%>%
) mutate(across(all_of(phq_vector), ~ if_else(is.na(.), mean_phq, .))) %>%
mutate(
imputed_phq_score = case_when(
== 1 ~ rowSums(select(., all_of(phq_vector)), na.rm = TRUE)
phq_diff
) )
Check missingness pattern after imputation
gg_miss_upset(
%>%
phq select(-mean_phq, -imputed_phq_score),
nsets = n_var_miss(phq),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
check distributions of phq scores before (phqtotalscore) and after imputation (imputed_phq_score)
<- phq %>%
phq_plot filter(between(phq_not_na, 6, 8)) %>%
select(guid, phqtotalscore, imputed_phq_score) %>%
pivot_longer(
cols = c("phqtotalscore", "imputed_phq_score"),
values_to = "score",
names_to = "score_name"
)
ggplot(phq_plot, aes(x = score_name, y = score, fill = score_name)) +
geom_violin(alpha = .25)
Remove intermediate field names not needed
<- phq %>%
phq select(-phq_not_na, -mean_phq)
D.2.7.3 New field name(s)
Add the field names “phq_diff” and “imputed_phq_score”to the data dictionary
# Create field names
<- data.frame(
phq_diff_new_row field_name = "phq_diff",
form_name = "patient_health_questionnaire_depression_scale_phq",
field_type = "factor",
select_choices_or_calculations = "1, if there is a response to more than or equal to 6 of 8 PHQ items|0,if there is a response to less than 6 of 8 PHQ items"
)
<- data.frame(
phq_score_new_row field_name = "imputed_phq_score",
form_name = "patient_health_questionnaire_depression_scale_phq",
field_type = "numeric",
field_label = "A greater than 10 total score on the PHQ indicates major depressive disorder ",
field_note = "The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the 8 items"
)
# Add new rows
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!phq_diff_new_row,
.after = match("phqtotalscore", psy_soc_dict$field_name)
# new summary score
)
# imputed phq score
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!phq_score_new_row,
.after = match("phq_diff", psy_soc_dict$field_name)
# new summary score )
D.2.7.4 Save:
Save “phq” and updated data dictionary as .csv files in the folder named “reformatted_phq”
write_csv(
phq,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_phq.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.8 Fear-Avoidance Beliefs Questionnaire v0.3 (FABQ)
The FABQ is a 4-item questionnaire that assesses fear avoidance behavior. The response to each item ranges from 0 (completely disagree) to 6 (completely agree) (Waddell et al., 1993). A total score is calculated by adding the scores for all the items and ranges from 0-24. A higher score indicates a higher degree of fear-avoidance belief.
D.2.8.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the FABQ data and keep completed forms, we will call FABQ
<- applyFilter(
fabq "fabq",
fearavoidance_beliefs_questionnaire_v03_fabq_complete )
D.2.8.2 Missing Data:
Missing data pattern in fabq.
gg_miss_upset(
fabq,nsets = n_var_miss(fabq),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Handling missing data:
In case of missing values, a total score can be calculated if responses to 3 of 4 items are available (less than or equal to 29% missing data). The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the 4 items(Arrieta et al., 2017).
We will create a variable “fabq_diff” and assign a value of 1 if there is a response to more than or equal to 3 of 4 FABQ items and 0 if there is a response to less than 3 of 4 FABQ items.
<- c(
fabq_vector "fabqphysclactvtywrsscl",
"fabqphysclactvtybckhrmscl",
"fabqphysactvtyshldntdoscl",
"fabqphysactvtycnntdoscl"
)
<- fabq %>%
fabq mutate(fabq_not_na = rowSums(!is.na(select(., all_of(fabq_vector))))) %>%
mutate(
fabq_diff = case_when(
>= 3 ~ 1,
fabq_not_na TRUE ~ 0
)%>%
) as_tibble() %>%
mutate(fabq_diff = as.factor(fabq_diff))
We will now replace missing values with the rounded mean of remaining items if fabq_diff = 1 i.e if less than or equal to 1 FABQ item is missing and calculate the total score by taking a sum of all the items.
<- fabq %>%
fabq mutate(
raw_fabq_score = rowSums(select(., all_of(fabq_vector)), na.rm = TRUE)
%>%
) mutate(
mean_fabq = case_when(
== 1 ~
fabq_diff round(rowMeans(select(., all_of(fabq_vector)), na.rm = TRUE))
)%>%
) mutate(across(all_of(fabq_vector), ~ if_else(is.na(.), mean_fabq, .))) %>%
mutate(
imputed_fabq_score = case_when(
== 1 ~ rowSums(select(., all_of(fabq_vector)), na.rm = TRUE)
fabq_diff
) )
Check missingness pattern after imputation
gg_miss_upset(
fabq,nsets = n_var_miss(fabq),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
check distributions of fabq scores before (raw_fabq_score) and after imputation (imputed_fabq_score)
<- fabq %>%
fabq_plot filter(between(fabq_not_na, 3, 4)) %>%
select(guid, raw_fabq_score, imputed_fabq_score) %>%
pivot_longer(
cols = c("raw_fabq_score", "imputed_fabq_score"),
values_to = "score",
names_to = "score_name"
)
ggplot(fabq_plot, aes(x = score_name, y = score, fill = score_name)) +
geom_violin(alpha = .25)
Remove intermediate field names not needed
<- fabq %>%
fabq select(-fabq_not_na, -mean_fabq)
D.2.8.3 New field name(s)
Add the field names “fabq_diff” and “imputed_fabq_score” to the data dictionary
# Create field names
<- data.frame(
fabq_diff_new_row field_name = "fabq_diff",
form_name = "fearavoidance_beliefs_questionnaire_v03_fabq",
field_type = "factor",
select_choices_or_calculations = "1, if there is a response to more than or equal to 3 of 4 FABQ items|0, if there is a response to less than 3 of 4 FABQ items"
)
<- data.frame(
fabq_score_new_row field_name = "imputed_fabq_score",
form_name = "fearavoidance_beliefs_questionnaire_v03_fabq",
field_type = "numeric",
field_label = "A higher score indicates a higher degree of fear-avoidance beliefs",
field_note = "The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all 4 items"
)
# Adding new rows to the data dictionary
# adding fabq_diff
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!fabq_diff_new_row,
.after = match("fabqphysclactvtybckhrmscl", psy_soc_dict$field_name)
)
# imputed fabq score
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!fabq_score_new_row,
.after = match("fabqphysclactvtybckhrmscl", psy_soc_dict$field_name)
)
D.2.8.4 Save:
Save “fabq” and updated data dictionary as .csv files in the folder named “reformatted_fabq”
write_csv(
fabq,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_fabq.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.9 Pain Catastrophizing Scale (PCS6)
The PCS-6 comprises six questions, each item is rated on a scale of 0 to 4, where 0 indicates “not at all” and 4 indicates “all the time.” The questionnaire also measures three sub-scales that evaluate rumination, magnification, and helplessness. A total score is calculated by adding the scores for all the 6 items and ranges from 0-24, the three sub scale scores can be calculated separately as well[Darnall et al. (2017)](McWilliams et al., 2015). A higher score indicates higher degree catastrophizing.
D.2.9.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the PCS-6 data and keep completed forms, we will call “pcs”
<- applyFilter("pcq", pain_catastrophizing_questionnaire_pcs6_complete) pcs
D.2.9.2 Missing Data:
Missing data pattern in pcs.
gg_miss_upset(
%>%
pcs select(-pcqtotalscoreval),
nsets = n_var_miss(pcs),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Handling missing data:
In case of missing values, a total score can still be calculated if responses to 5 of 6 items are available (0.29 x 6 = 1.74). The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the 6 items(refer to section 1.c).
Two of the the observations have complete data missing, we will remove these observations. We will create a variable “pcs_diff” and assign a value of 1 if there is a response to more than or equal to 5 of 6 PCS items and 0 if there is a response to less than 5 of 6 PCS items.
<- c(
pcs_vector "pcqpainawfulovrwhlmscl",
"pcqfeelcantwithstandscl",
"pcqafraidpainworsescl",
"pcqhurtscl",
"pcqpainstopscl",
"pcqseriousscl"
)
<- pcs %>%
pcs mutate(pcs_not_na = rowSums(!is.na(select(., all_of(pcs_vector))))) %>%
mutate(
pcs_diff = case_when(
>= 5 ~ 1,
pcs_not_na TRUE ~ 0
)%>%
) as_tibble() %>%
mutate(pcs_diff = as.factor(pcs_diff)) %>%
filter(pcs_not_na != 0)
We will now replace missing values with the rounded mean of remaining items if pcs_diff = 1 i.e if 1 pcs item is missing, and calculate the total score by taking a sum of all the items.
<- pcs %>%
pcs mutate(
raw_pcs_score = rowSums(select(., all_of(pcs_vector)), na.rm = TRUE)
%>%
) mutate(
mean_pcs = case_when(
== 1 ~
pcs_diff round(rowMeans(select(., all_of(pcs_vector)), na.rm = TRUE))
)%>%
) mutate(across(all_of(pcs_vector), ~ if_else(is.na(.), mean_pcs, .))) %>%
mutate(
imputed_pcs_score = case_when(
== 1 ~ rowSums(select(., all_of(pcs_vector)), na.rm = TRUE)
pcs_diff
) )
Check missingness pattern after imputation
gg_miss_upset(
%>%
pcs select(-pcqtotalscoreval, -raw_pcs_score, -imputed_pcs_score),
nsets = n_var_miss(pcs),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
check distributions of PCS scores before (raw_pcs_score) and after imputation (imputed_pcs_score)
<- pcs %>%
pcs_plot filter(between(pcs_not_na, 5, 6)) %>%
select(guid, raw_pcs_score, imputed_pcs_score) %>%
pivot_longer(
cols = c("raw_pcs_score", "imputed_pcs_score"),
values_to = "score",
names_to = "score_name"
)
ggplot(pcs_plot, aes(x = score_name, y = score, fill = score_name)) +
geom_violin(alpha = .25)
Remove intermediate field names not needed
<- pcs %>%
pcs select(-pcs_not_na, -mean_pcs, -raw_pcs_score)
D.2.9.3 New field name(s)
Add the field names “pcs_diff” and “imputed_pcs_score”to the data dictionary
# Create field names
<- data.frame(
pcs_diff_new_row field_name = "pcs_diff",
form_name = "pain_catastrophizing_questionnaire_pcs6",
field_type = "factor",
select_choices_or_calculations = "1, if there is a response to more than or equal to 5 of 6 PCS items|0, if there is a response to less than 5 of 6 PCS items"
)
<- data.frame(
pcs_score_new_row field_name = "imputed_pcs_score",
form_name = "pain_catastrophizing_questionnaire_pcs6",
field_type = "numeric",
field_label = "A higher score indicates a higher degree of catastrophizing",
field_note = "The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the items"
)
# adding pcs_diff
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!pcs_diff_new_row,
.after = match("pcqtotalscoreval", psy_soc_dict$field_name)
)
# imputed pcs score
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!pcs_score_new_row,
.after = match("pcqtotalscoreval", psy_soc_dict$field_name)
)
D.2.9.4 Save:
Save “pcs” and updated data dictionary as .csv files in the folder named “reformatted_pcs”
write_csv(
pcs,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_pcs.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.10 Symptom Severity Index v1.0 (SSI)
The SSI measures the severity of fatigue, cognitive dysfunction, and unrefreshed sleep over the past week using a Likert scale from 0 (no problem) to 3 (severe). If the sum of the scores of fatigue, waking unrefreshed, and cognitive symptoms is more than 0, an additional question (ssi_chronicyn) related to the duration of symptoms is asked. The questionnaire also comprises three additional questions for the history of lower abdomen pain, depression, and headache for at least three months (yes/no). The symptom severity scale (SSS) score is calculated by summing up the scores of fatigue, waking unrefreshed, and cognitive symptoms, which range from 0 to 9, and the sum (0-3) of additional symptoms experienced in the past six months. The final total score can be between 0 and 12.(Wolfe et al., 2016).
D.2.10.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the SSI data and keep completed forms, we will call “ssi”
<- applyFilter("ssi", symptom_severity_index_v10_ssi_complete) ssi
D.2.10.2 Missing Data:
Missing data pattern in SSI, we will exclude “ssi_chronicyn” since the response to this field name is conditional on the sum of fatigue, waking unrefreshed, and cognitive symptom scores being more than 0.
gg_miss_upset(
%>%
ssi select(-ssi_chronicyn),
nsets = n_var_miss(ssi),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Handling missing data:
Missing values for fatigue, cognitive dysfunction, or unrefreshed sleep cannot be imputed using row wise means since 28% X 3 = 0.87, resulting in the floor value for non integer values being 0. Additionally we will not impute missing values for history of lower abdomen pain, depression, and headache for at least three months (yes/no)(refer section 1.c).
Scoring:
We will calculate:
A summed score for fatigue, cognitive dysfunction, or unrefreshed sleep if responses to all three items are available, ranging from 0-9, we will name this “ssi_symp_score”
A summed score for history of lower abdomen pain, depression, and headache for at least three months (yes/no), ranging from 0-3, we will name this “ssi_hist_score”
Finally we will calculate a total summed score ranging from 0-12 if responses to all items are available, we will name this “ssi_total_score”
<- c("ssi_fatigue", "ssi_cognitive", "ssi_tired")
ssi_symptoms <- c("ssi_abdpainyn", "ssi_depressyn", "ssi_headacheyn")
ssi_history <- c("ssi_symp_score", "ssi_hist_score")
ssi_all
<- ssi %>%
ssi mutate(
ssi_symp_score = rowSums(select(., all_of(ssi_symptoms)), na.rm = FALSE)
%>%
) mutate(
ssi_hist_score = rowSums(select(., all_of(ssi_history)), na.rm = FALSE)
%>%
) mutate(ssi_total_score = rowSums(select(., all_of(ssi_all)), na.rm = FALSE))
We will now create a field name “ssi_diff”, and assign a value of 0 if both ssi_symp_score and ssi_hist_score are missing, 1 if only ssi_symp_score is available, 2 if only ssi_hist_score is available and 3 if both scores and invariably the total score is available. If the sum of the scores of fatigue, waking unrefreshed, and cognitive symptoms is more than 0, an additional question (ssi_chronicyn) related to the duration of symptoms is asked. We will create a field name “ssi_missing_chronic” and assign a value of 0 if “ssi_symp_score” > 0 and “ssi_chronicyn” is missing, of 1 if “ssi_symp_score” > 0 and “ssi_chronicyn” is not missing, 2 if “ssi_sym_score” = 0 or “ssi_sym_score” is missing and “ssi_chronicyn” is missing.
<- ssi %>%
ssi mutate(
ssi_diff = case_when(
is.na(ssi_symp_score) & is.na(ssi_hist_score) ~ 0,
!is.na(ssi_symp_score) & is.na(ssi_hist_score) ~ 1,
is.na(ssi_symp_score) & !is.na(ssi_hist_score) ~ 2,
TRUE ~ 3
)%>%
) mutate(
ssi_missing_chronic = case_when(
> 0 & !is.na(ssi_chronicyn) ~ 1,
ssi_symp_score > 0 & is.na(ssi_chronicyn) ~ 0,
ssi_symp_score TRUE ~ 2
)%>%
) mutate(ssi_diff = as.factor(ssi_diff)) %>%
mutate(ssi_missing_chronic = as.factor(ssi_missing_chronic))
D.2.10.3 New field name(s)
Add the field names “ssi_symp_score”, “ssi_hist_score”, “ssi_total_score”, “ssi_diff”, and “ssi_missing_chronic” to the data dictionary.
# Create field names
<- data.frame(
ssi_symp_score_new_row field_name = "ssi_symp_score",
form_name = "symptom_severity_index_v10_ssi",
field_type = "numeric",
field_label = "Total score ranges from 0 to 9",
field_note = "Summed score for fatigue, cognitive dysfunction, or unrefreshed sleep if responses to all three items are available"
)
<- data.frame(
ssi_hist_score_new_row field_name = "ssi_hist_score",
form_name = "symptom_severity_index_v10_ssi",
field_type = "numeric",
field_label = "Total score ranges from 0 to 3",
field_note = "Summed score for history of lower abdomen pain, depression, and headache for at least three months (yes/no)"
)
<- data.frame(
ssi_total_score_new_row field_name = "ssi_total_score",
form_name = "symptom_severity_index_v10_ssi",
field_type = "numeric",
field_label = "Total score ranges from 0 to 12",
field_note = "Summed score of all the items if responses to all items are available"
)
<- data.frame(
ssi_diff_new_row field_name = "ssi_diff",
form_name = "symptom_severity_index_v10_ssi",
field_type = "factor",
select_choices_or_calculations = "0, if both ssi_symp_score and ssi_hist_score are missing, 1 if only ssi_symp_score is available|2, if only ssi_hist_score is available | 3, if both scores and invariably the total score is available"
)
<- data.frame(
ssi_missing_chronic_new_row field_name = "ssi_missing_chronic",
form_name = "symptom_severity_index_v10_ssi",
field_type = "factor",
select_choices_or_calculations = "0 if ssi_symp_score > 0 and ssi_chronicyn is missing| 1 if ssi_symp_score > 0 and ssi_chronicyn is not missing,2 if ssi_sym_score = 0 or is missing and ssi_chronicyn is missing"
)
# adding new rows
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!ssi_symp_score_new_row,
.after = match("ssi_headacheyn", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!ssi_hist_score_new_row,
.after = match("ssi_headacheyn", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!ssi_total_score_new_row,
.after = match("ssi_headacheyn", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!ssi_diff_new_row,
.after = match("ssi_headacheyn", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!ssi_missing_chronic_new_row,
.after = match("ssi_headacheyn", psy_soc_dict$field_name)
)
D.2.10.4 Save:
Save “ssi” and updated data dictionary as .csv files in the folder named “reformatted_ssi”
write_csv(
ssi,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_ssi.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.11 Pain Detect Questionnaire (PD-Q)
The PD-Q consists of seven question to evaluate neuropathic pain, followed by two additional questions that focus on pain patterns to differentiate between neuropathic and nociceptive pain.
Each item in the initial seven questions is rated on a scale of 0 to 5, where 0 indicates “never” and 5 indicates “very strongly.” The total score is calculated by summing up the scores of the initial seven questions, which can range from 0 to 35, plus the sum of additional two questions. The final total score can be between 0 and 38(Freynhagen et al., 2006).
D.2.11.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the PD-Q data and keep completed forms, we will call “pdq”
<- applyFilter("pd", pain_detect_questionnaire_pdq_complete) pdq
D.2.11.2 Missing Data:
Missing data pattern in pdq: Subjects with a total score of 0 on the initial 7 questions may not have responses to the questions related to pain patterns. We will account for this when visualizing missing data pattern.
There are rows where the sum of initial seven questions is 0 and there are responses to “pdqpaincourse” and “pdradiateregions”. Create a dataset “pdq_zero” and fill in missing values with “99” for the questions related to pain pattern (“pdqpaincourse” and “pdradiateregions”) if the sum of initial seven questions is 0 and if responses to “pdqpaincourse” and “pdradiateregions” are not available
<- c(
pdq_seven "pdburnsens",
"pdtinglsens",
"pdlttouch",
"pdsudpanatt",
"pdbathwtr",
"pdnumb",
"pdslightpress"
)<- pdq %>%
pdq_zero mutate(
pdq_seven_score = rowSums(select(., all_of(pdq_seven)), na.rm = TRUE)
%>%
) mutate(
pdqpaincourse = case_when(
== 0 & is.na(pdqpaincourse) ~ 99,
pdq_seven_score TRUE ~ pdqpaincourse,
)%>%
) mutate(
pdradiateregions = case_when(
== 0 & is.na(pdradiateregions) ~ 99,
pdq_seven_score TRUE ~ pdradiateregions,
) )
Visualize missing data after accounting for missing responses to pdqpaincourse” and “pdradiateregions” if the sum of total score of the initial 7 items is 0.
gg_miss_upset(
pdq_zero,nsets = n_var_miss(pdq_zero),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Handling missing data:
Missing values for the initial neuropathic pain related seven question can be imputed if the responses to 5 of 7 items are available (less than or equal to 29% missing data). The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the seven items. We will not impute missing values “pdqpaincourse” and “pdradiateregions” since these two follow up questions focus on pain patterns to differentiate between neuropathic and nociceptive pain (refer section 1.c).
We will create a variable “pdq_diff” and assign a value of 1 if there is a response to more than or equal to 5 of 7 Pdq items and 0 if there is a response to less than 5 of 7 Pdq items.
<- c(
pdq_seven "pdburnsens",
"pdtinglsens",
"pdlttouch",
"pdsudpanatt",
"pdbathwtr",
"pdnumb",
"pdslightpress"
)<- pdq %>%
pdq mutate(pdq_not_na = rowSums(!is.na(.[, pdq_seven]))) %>%
mutate(
pdq_diff = case_when(
>= 5 ~ 1,
pdq_not_na TRUE ~ 0
)%>%
) as_tibble() %>%
mutate(pdq_diff = as.factor(pdq_diff))
We will now replace missing values with the rounded mean of remaining items if pdq_diff = 1 and calculate a score by taking a sum of the 7 neuropathic pain related items, we will call this “imputed_pdq_score.”
<- pdq %>%
pdq mutate(
raw_pdq_score = rowSums(select(., all_of(pdq_seven)), na.rm = TRUE)
%>%
) mutate(
mean_pdq = case_when(
== 1 ~
pdq_diff round(rowMeans(select(., all_of(pdq_seven)), na.rm = TRUE))
)%>%
) mutate(across(all_of(pdq_seven), ~ if_else(is.na(.), mean_pdq, .))) %>%
mutate(
imputed_pdq_score = case_when(
== 1 ~ rowSums(select(., all_of(pdq_seven)), na.rm = TRUE)
pdq_diff
) )
Check missingness pattern after imputation
<- pdq_zero %>%
pdq_zero_imputed mutate(pdq_not_na = rowSums(!is.na(.[, pdq_seven]))) %>%
mutate(
pdq_diff = case_when(
>= 5 ~ 1,
pdq_not_na TRUE ~ 0
)%>%
) as_tibble() %>%
mutate(pdq_diff = as.factor(pdq_diff)) %>%
mutate(
mean_pdq = case_when(
== 1 ~
pdq_diff round(rowMeans(select(., all_of(pdq_seven)), na.rm = TRUE))
)%>%
) mutate(across(all_of(pdq_seven), ~ if_else(is.na(.), mean_pdq, .))) %>%
select(-pdq_seven_score, -pdq_diff, -pdq_not_na, -mean_pdq)
gg_miss_upset(
pdq_zero_imputed,nsets = n_var_miss(pdq_zero_imputed),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
check distributions of PDQ scores before (raw_pdq_score) and after imputation (imputed_pdq_score)
<- pdq %>%
pdq_plot filter(between(pdq_not_na, 5, 7)) %>%
select(guid, raw_pdq_score, imputed_pdq_score) %>%
pivot_longer(
cols = c("raw_pdq_score", "imputed_pdq_score"),
values_to = "score",
names_to = "score_name"
)
ggplot(pdq_plot, aes(x = score_name, y = score, fill = score_name)) +
geom_violin(alpha = .25)
Remove intermediate field names not needed
<- pdq %>%
pdq select(-pdq_not_na, -mean_pdq, -raw_pdq_score)
D.2.11.3 Scoring:
The scoring for pain behavior pattern (pdqpaincourse) and the pain radiation (pdradiateregions) is as follows(Freynhagen et al., 2006)[PainDETECT.pdf](https://www.oregon.gov/oha/HPA/dsi-pmc/PainCareToolbox/PainDETECT.pdf):
First we will recode values for “pdqpaincourse” and “pdradiateregions” according to the above table
We will then combine “pdqpaincourse”, “pdradiateregions” scores and the “imputed_pdq_score” to get the total score.
Recode values for “pdqpaincourse” and “pdradiateregions”, we will call these “pdqpaincourse_recoded” and “pdradiateregions_recoded”
<- pdq %>%
pdq mutate(
pdqpaincourse_recoded = case_when(
== 1 ~ 0,
pdqpaincourse == 2 ~ -1,
pdqpaincourse == 3 ~ 1,
pdqpaincourse == 4 ~ 1,
pdqpaincourse TRUE ~ pdqpaincourse
)%>%
) mutate(
pdradiateregions_recoded = case_when(
== 0 ~ 0,
pdradiateregions == 1 ~ 2,
pdradiateregions TRUE ~ pdradiateregions
) )
Combine “pdqpaincourse_recoded”, “pdradiateregions_recoded” scores and the “imputed_pdq_score” to get the total score if all three scores are available, we will call this “pd_neuro_total_score”
<- c(
pd_neuro "pdqpaincourse_recoded",
"pdradiateregions_recoded",
"imputed_pdq_score"
)<- pdq %>%
pdq mutate(
pd_neuro_total_score = rowSums(select(., all_of(pd_neuro)), na.rm = FALSE)
%>%
) mutate(pdqpaincourse_recoded = as.factor(pdqpaincourse_recoded)) %>%
mutate(pdradiateregions_recoded = as.factor(pdradiateregions_recoded))
D.2.11.4 New field name(s)
Add the field names “pdq_diff”,“pdqpaincourse_recoded”,“pdradiateregions_recoded”,“imputed_pdq_score”, and “pd_neuro_total_score” to the data dictionary
# Create field names
<- data.frame(
pdq_diff_new_row field_name = "pdq_diff",
form_name = "pain_detect_questionnaire_pdq",
field_type = "factor",
select_choices_or_calculations = "1, if there is a response to more than or equal to 5 of 7 PDQ neuropathic pain related items| 0, if there is a response to less than 5 of 7 items"
)
<- data.frame(
pdq_score_new_row field_name = "imputed_pdq_score",
form_name = "pain_detect_questionnaire_pdq",
field_type = "numeric",
field_label = "Sum of the 7 neuropathic pain related items by replacing missing values with the rounded mean of remaining items if pdq_diff = 1"
)
<- data.frame(
pdq_course_new_row field_name = "pdqpaincourse_recoded",
form_name = "pain_detect_questionnaire_pdq",
field_type = "factor",
select_choices_or_calculations = "0,Persistent pain with slight fluctuations|-1,Persistent pain with pain attacks|1,Pain attacks without pain between them|1,Pain attacks with pain between them"
)
<- data.frame(
pdq_radiate_new_row field_name = "pdradiateregions_recoded",
form_name = "pain_detect_questionnaire_pdq",
field_type = "factor",
select_choices_or_calculations = "0,No Radiating pains|2,Radiating pains"
)
<- data.frame(
pdq_neuro_score_new_row field_name = "pd_neuro_total_score",
form_name = "pain_detect_questionnaire_pdq",
field_type = "numeric",
field_label = "Sum of pdqpaincourse_recoded,pdradiateregions_recoded and the imputed_pdq_score if all three scores are available"
)
# adding rows
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!pdq_diff_new_row,
.after = match("pdradiateregions", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!pdq_score_new_row,
.after = match("pdradiateregions", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!pdq_course_new_row,
.after = match("pdradiateregions", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!pdq_radiate_new_row,
.after = match("pdradiateregions", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!pdq_neuro_score_new_row,
.after = match("pdradiateregions", psy_soc_dict$field_name)
)
D.2.11.5 Save:
Save “pdq” and updated data dictionary as .csv files in the folder named “reformatted_pdq”
write_csv(
pdq,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_pdq.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.12 Patient-Reported Outcomes Measurement Information System (PROMIS)
PROMIS instruments are based on item response theory (IRT), where the responses to a set of items measure a latent trait(Cella et al., 2010). PROMIS instruments can be scored accurately using the HealthMeasures Scoring Service found here. The A2PCS uses the following PROMIS instruments:
Form Name |
---|
PROMIS SF v1.2 - Physical Function 8b |
PROMIS SF v1.0 - Sleep Disturbance 6a (Sleep I) |
PROMIS SF v1.0 - Fatigue 7a |
PROMIS SF v2.0 - Emotional Support 6a |
PROMIS SF v2.0 - Instrumental Support 6a |
D.2.13 Pain-Sleep Duration (Sleep II)
The Sleep II form measures the average number of hours and minutes of sleep each night during the past month (Buysse et al., 1989).
D.2.13.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the Sleep Duration data and keep completed forms, we will call “sleep_dur”
<- applyFilter("sleep", painsleep_duration_sleep_ii_complete) %>%
sleep_dur retype()
D.2.13.2 Outliers:
Missing values for hours and minutes of sleep cannot be imputed. We will check the distribution of hours and minutes of sleep and check for outliers
par(mfrow = c(1, 1))
boxplot(sleep_dur$sleepnighthourmindurhrs, ylab = "Sleep Hours")
boxplot(sleep_dur$sleepnighthourmindurmins, ylab = "Sleep Minutes")
We will create a variable “sleep_outlier_hours” and assign a value of 0 if “sleepnighthourmindurhrs” is missing and “sleepnighthourmindurmins” is available, 1 if “sleepnighthourmindurhrs” is less than or equal to 23 (within range) and 2 if “sleepnighthourmindurhrs” is more than 23, and 4 if both “sleepnighthourmindurhrs” and “sleepnighthourmindurmins” are missing. We will not assign a value for missing “sleepnighthourmindurmins” for this variable since most people report sleep in hours, unless both “sleepnighthourmindurhrs” and “sleepnighthourmindurmins” are missing.
We will also create a variable “sleep_outlier_mins” and assign a value of 1 if “sleepnighthourmindurmins” is less than or equal to 59 (within range) and 2 if “sleepnighthourmindurmins” is more than 59 (out of range), and 3 if “sleepnighthourmindurmins” is missing.
<- sleep_dur %>%
sleep_dur mutate(
sleep_outlier_hours = case_when(
is.na(sleepnighthourmindurhrs) & !is.na(sleepnighthourmindurmins) ~ 0,
<= 23 ~ 1,
sleepnighthourmindurhrs > 23 ~ 2,
sleepnighthourmindurhrs is.na(sleepnighthourmindurhrs) & is.na(sleepnighthourmindurmins) ~ 4
)%>%
) mutate(sleep_outlier_hours = as.factor(sleep_outlier_hours))
<- sleep_dur %>%
sleep_dur mutate(
sleep_outlier_mins = case_when(
<= 59 ~ 1,
sleepnighthourmindurmins > 59 ~ 2,
sleepnighthourmindurmins TRUE ~ 3
)%>%
) mutate(sleep_outlier_mins = as.factor(sleep_outlier_mins))
D.2.13.3 New field name(s)
Add the field names “sleep_outlier_hours” and “sleep_outlier_mins” to the data dictionary
# Create field names
<- data.frame(
sleep_hour_new_row field_name = "sleep_outlier_hours",
form_name = "painsleep_duration_sleep_ii",
field_type = "factor",
select_choices_or_calculations = "0, if sleepnighthourmindurhrs is missing and sleepnighthourmindurmins is available|1 if sleepnighthourmindurhrs is less than or equal to 23 (within range)|2,if sleepnighthourmindurhrs is more than 23 | 4, if both sleepnighthourmindurhrs and sleepnighthourmindurmins are missing"
)
<- data.frame(
sleep_mins_new_row field_name = "sleep_outlier_mins",
form_name = "painsleep_duration_sleep_ii",
field_type = "factor",
select_choices_or_calculations = "1 if sleepnighthourmindurmins is less than or equal to 59 (within range)|2, if sleepnighthourmindurmins is more than 59 (out of range)| 3 if sleepnighthourmindurmins is missing"
)
# adding rows
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!sleep_hour_new_row,
.after = match("sleepnighthourmindur", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!sleep_mins_new_row,
.after = match("sleepnighthourmindur", psy_soc_dict$field_name)
)
D.2.13.4 Save:
Save “sleep_dur” and updated data dictionary as .csv files in the folder named “reformatted_sleep_dur”
write_csv(
sleep_dur,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_sleep_dur.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.14 Pain Resilience Scale (PRS)
PRS is a 14-item scale that measures resilience during intense or prolonged pain(Slepian et al., 2016).
Each item is rated on a scale of 0 to 4, where 0 indicates “not at all” and 4 indicates “all the time.” The total score is calculated by summing up all 14 items, which can range from 0 to 56. Higher scores indicate greater pain related resilience. PRS also contains two sub scales(Slepian et al., 2016)
Behavioral Perseverance = sum of items 1-5
Cognitive/Affective positivity = sum of items 6-14
D.2.14.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the PRS forms and keep completed forms, we will call this “prs”
<- applyFilter("prs", pain_resilience_scale_prs_complete) prs
D.2.14.2 Behavioral Perseverance (items 1-5): Handling missing data and scoring
Missing data pattern in Behavioral Perseverance.
gg_miss_upset(
%>%
prs select(
prsbackoutscl,
prsworkgoalsscl,
prspushthroughscl,
prscontworkscl,
prsstayactivescl
),nsets = n_var_miss(prs),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Missing values can be imputed if the responses to 4 of 5 items are available (less than or equal to 29% missing data).
The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the seven items
We will create a variable “prs_bhv_diff” and assign a value of 1 if there is a response to more than or equal to 4 of 5 behavioral perseverance items and 0 if there is a response to less than 4 of 5 items.
<- c(
prs_bhv_five "prsbackoutscl",
"prsworkgoalsscl",
"prspushthroughscl",
"prscontworkscl",
"prsstayactivescl"
)
<- prs %>%
prs mutate(prs_bhv_not_na = rowSums(!is.na(select(., all_of(prs_bhv_five))))) %>%
mutate(
prs_bhv_diff = case_when(
>= 4 ~ 1,
prs_bhv_not_na TRUE ~ 0
)%>%
) as_tibble() %>%
mutate(prs_bhv_diff = as.factor(prs_bhv_diff))
We will now replace missing values with the rounded mean of remaining items if prs_bhv_diff = 1 and calculate a behavioral perseverance score by taking a sum of 5 associated item, we will call this “imputed_bhv_score.”
<- prs %>%
prs mutate(
raw_bhv_score = rowSums(select(., all_of(prs_bhv_five)), na.rm = TRUE)
%>%
) mutate(
mean_bhv_prs = case_when(
== 1 ~
prs_bhv_diff round(rowMeans(select(., all_of(prs_bhv_five)), na.rm = TRUE))
)%>%
) mutate(across(all_of(prs_bhv_five), ~ if_else(is.na(.), mean_bhv_prs, .))) %>%
mutate(
imputed_bhv_score = case_when(
== 1 ~ rowSums(select(., all_of(prs_bhv_five)), na.rm = TRUE)
prs_bhv_diff
) )
Check missingness pattern after imputation
gg_miss_upset(
%>%
prs select(
prsbackoutscl,
prsworkgoalsscl,
prspushthroughscl,
prscontworkscl,
prsstayactivescl
),nsets = n_var_miss(prs),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
check distributions of behavioral perseverance scores before (raw_bhv_score) and after imputation (imputed_bhv_score)
<- prs %>%
prs_bhv_plot filter(between(prs_bhv_not_na, 4, 5)) %>%
select(guid, raw_bhv_score, imputed_bhv_score) %>%
pivot_longer(
cols = c("raw_bhv_score", "imputed_bhv_score"),
values_to = "score",
names_to = "score_name"
)
ggplot(prs_bhv_plot, aes(x = score_name, y = score, fill = score_name)) +
geom_violin(alpha = .25)
Remove intermediate field names not needed
<- prs %>%
prs select(-prs_bhv_not_na, -mean_bhv_prs, -raw_bhv_score)
D.2.14.3 Cognitive/Affective positivity: Handling missing data and scoring
Missing Data Pattern
gg_miss_upset(
%>%
prs select(
prsfocuspositivescl,
prsposattitudescl,
prsnotaffecthappyscl,
prsfindjoyscl,
prshopefulscl,
prsnotgetdownscl,
prsnotupsetscl,
prsavoidnegativescl,
prsstayrelaxscl
),nsets = n_var_miss(prs),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Missing values can be imputed if the responses to 7 of 9 items are available (less than or equal to 29% missing data).
The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the seven items
We will create a variable “prs_cog_diff” and assign a value of 1 if there is a response to more than or equal to 7 of 9 Cognitive/Affective positivity items and 0 if there is a response to less than 7 of 9 items.
<- c(
prs_cog_nine "prsfocuspositivescl",
"prsposattitudescl",
"prsnotaffecthappyscl",
"prsfindjoyscl",
"prshopefulscl",
"prsnotgetdownscl",
"prsnotupsetscl",
"prsavoidnegativescl",
"prsstayrelaxscl"
)
<- prs %>%
prs mutate(prs_cog_not_na = rowSums(!is.na(.[, prs_cog_nine]))) %>%
mutate(
prs_cog_diff = case_when(
>= 7 ~ 1,
prs_cog_not_na TRUE ~ 0
)%>%
) as_tibble() %>%
mutate(prs_cog_diff = as.factor(prs_cog_diff))
We will now replace missing values with the rounded mean of remaining items if prs_cog_diff = 1 and calculate a Cognitive/Affective positivity score by taking a sum of 9 associated item, we will call this “imputed_prs_cog_score.”
<- prs %>%
prs mutate(
raw_prs_cog_score = rowSums(select(., all_of(prs_cog_nine)), na.rm = TRUE)
%>%
) mutate(
mean_cog_prs = case_when(
== 1 ~
prs_cog_diff round(rowMeans(select(., all_of(prs_cog_nine)), na.rm = TRUE))
)%>%
) mutate(across(all_of(prs_cog_nine), ~ if_else(is.na(.), mean_cog_prs, .))) %>%
mutate(
imputed_prs_cog_score = case_when(
== 1 ~ rowSums(select(., all_of(prs_cog_nine)), na.rm = TRUE)
prs_cog_diff
) )
Check missingness pattern after imputation
gg_miss_upset(
%>%
prs select(
prsfocuspositivescl,
prsposattitudescl,
prsnotaffecthappyscl,
prsfindjoyscl,
prshopefulscl,
prsnotgetdownscl,
prsnotupsetscl,
prsavoidnegativescl,
prsstayrelaxscl
),nsets = n_var_miss(prs),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Check distributions Cognitive/Affective positivity scores before (raw_prs_cog_score) and after imputation (imputed_prs_cog_score)
<- prs %>%
prs_cog_plot filter(between(prs_cog_not_na, 7, 9)) %>%
select(guid, raw_prs_cog_score, imputed_prs_cog_score) %>%
pivot_longer(
cols = c("raw_prs_cog_score", "imputed_prs_cog_score"),
values_to = "score",
names_to = "score_name"
)
ggplot(prs_cog_plot, aes(x = score_name, y = score, fill = score_name)) +
geom_violin(alpha = .25)
Remove intermediate field names not needed
<- prs %>%
prs select(-prs_cog_not_na, -mean_cog_prs, -raw_prs_cog_score)
D.2.14.4 Total PRS Score: Handling missing data and scoring
Missing data pattern in prs.
gg_miss_upset(
%>%
prs select(
prsbackoutscl,
prsworkgoalsscl,
prspushthroughscl,
prscontworkscl,
prsstayactivescl,
prsfocuspositivescl,
prsposattitudescl,
prsnotaffecthappyscl,
prsfindjoyscl,
prshopefulscl,
prsnotgetdownscl,
prsnotupsetscl,
prsavoidnegativescl,
prsstayrelaxscl
),nsets = n_var_miss(prs),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Handling missing data for total PRS score:
Missing values can be imputed if the responses to 10 of 14 items are available (less than or equal to 29% missing data).
Since the correlation between Behavioral Perseverance and Cognitive/Affective positivity is 0.76(Slepian et al., 2016) average score of the completed items can be used to impute missing values.
The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the seven items
We will create a variable “prs_diff” and assign a value of 1 if there is a response to more than or equal to 10 of 14 PRS items and 0 if there is a response to less than 10 of 14 PRS items.
<- c(
prs_fourteen "prsbackoutscl",
"prsworkgoalsscl",
"prspushthroughscl",
"prscontworkscl",
"prsstayactivescl",
"prsfocuspositivescl",
"prsposattitudescl",
"prsnotaffecthappyscl",
"prsfindjoyscl",
"prshopefulscl",
"prsnotgetdownscl",
"prsnotupsetscl",
"prsavoidnegativescl",
"prsstayrelaxscl"
)
<- prs %>%
prs mutate(prs_not_na = rowSums(!is.na(.[, prs_fourteen]))) %>%
mutate(
prs_diff = case_when(
>= 10 ~ 1,
prs_not_na TRUE ~ 0
)%>%
) as_tibble() %>%
mutate(prs_diff = as.factor(prs_diff))
We will now replace missing values with the rounded mean of remaining items if prs_diff = 1 and calculate a total score by taking a sum of all fourteen items, we will call this “imputed_prs_score.”
<- prs %>%
prs mutate(
raw_prs_score = rowSums(select(., all_of(prs_fourteen)), na.rm = TRUE)
%>%
) mutate(
mean_prs = case_when(
== 1 ~
prs_diff round(rowMeans(select(., all_of(prs_fourteen)), na.rm = TRUE))
)%>%
) mutate(across(all_of(prs_fourteen), ~ if_else(is.na(.), mean_prs, .))) %>%
mutate(
imputed_prs_score = case_when(
== 1 ~ rowSums(select(., all_of(prs_fourteen)), na.rm = TRUE)
prs_diff
) )
Check missingness pattern after imputation
gg_miss_upset(
%>%
prs select(
-ends_with("score"),
-mean_prs,
prs_not_na,-prs_diff,
-pain_resilience_scale_prs_complete
),nsets = n_var_miss(prs),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
check distributions of total PRS scores before (raw_prs_score) and after imputation (imputed_prs_score)
<- prs %>%
prs_plot filter(between(prs_not_na, 10, 14)) %>%
select(guid, raw_prs_score, imputed_prs_score) %>%
pivot_longer(
cols = c("raw_prs_score", "imputed_prs_score"),
values_to = "score",
names_to = "score_name"
)
ggplot(prs_plot, aes(x = score_name, y = score, fill = score_name)) +
geom_violin(alpha = .25)
Remove intermediate field names not needed
<- prs %>%
prs select(-prs_not_na, -mean_prs, -raw_prs_score)
D.2.14.5 New field name(s)
Add the following field names to the data dictionary:
“prs_diff”,“prs_cog_diff”,“prs_bhv_diff”,“imputed_bhv_score”,“imputed_prs_cog_score”,“imputed_prs_score”
# Create field names
<- data.frame(
prs_diff_new_row field_name = "prs_diff",
form_name = "pain_resilience_scale_prs",
field_type = "factor",
select_choices_or_calculations = "1,if there is a response to more than or equal to 10 of 14 PRS items | 0, if there is a response to less than 10 of 14 items"
)
<- data.frame(
prs_cog_diff_new_row field_name = "prs_cog_diff",
form_name = "pain_resilience_scale_prs",
field_type = "factor",
select_choices_or_calculations = "1, if there is a response to more than or equal to 7 of 9 Cognitive/Affective positivity related items | 0, if there is a response to less than 7 of 9 items"
)
<- data.frame(
prs_bhv_diff_new_row field_name = "prs_bhv_diff",
form_name = "pain_resilience_scale_prs",
field_type = "factor",
select_choices_or_calculations = "1,If there is a response to more than or equal to 4 of 5 behavioral perseverance related items| 0, if there is a response to less than 4 of 5 items"
)
<- data.frame(
prs_score_new_row field_name = "imputed_prs_score",
form_name = "pain_resilience_scale_prs",
field_type = "numeric",
field_label = "sum of all fourteen PRS items. Replacing missing values with the rounded mean of remaining items if prs_diff = 1"
)
<- data.frame(
prs_cog_score_new_row field_name = "imputed_prs_cog_score",
form_name = "pain_resilience_scale_prs",
field_type = "numeric",
field_label = "sum of all nine Cognitive/Affective positivity items. Replacing missing values with the rounded mean of remaining items if prs_cog_diff = 1"
)
<- data.frame(
prs_bhv_score_new_row field_name = "imputed_bhv_score",
form_name = "pain_resilience_scale_prs",
field_type = "numeric",
field_label = "sum of five behavioral perseverance items. Replacing missing values with the rounded mean of remaining items if prs_bhv_diff = 1"
)
# adding rows
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!prs_diff_new_row,
.after = match("prscognitivescore", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!prs_cog_diff_new_row,
.after = match("prscognitivescore", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!prs_bhv_diff_new_row,
.after = match("prscognitivescore", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!prs_score_new_row,
.after = match("prscognitivescore", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!prs_cog_score_new_row,
.after = match("prscognitivescore", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!prs_bhv_score_new_row,
.after = match("prscognitivescore", psy_soc_dict$field_name)
)
D.2.14.6 Save:
Save “prs” and updated data dictionary as .csv files in the folder named “reformatted_prs”
write_csv(
prs,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_prs.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.15 Multidimensional Inventory of Subjective Cognitive Impairment v1.0 (MISCI)
Multidimensional Inventory of Subjective Cognitive Impairment (MISCI) is used to evaluate cognitive function in Fibromyalgia patients. The MISCI comprises ten items based on a Likert scale ranging from 1-5. The initial six items evaluate perceived cognitive abilities and are positively phrased, while the remaining four items assess cognitive difficulties and are negatively phrased. Therefore, items 7-10 should be reverse-coded before being added to the responses of the other items to calculate the total score. The total score can range from 10-50, where higher scores indicate better perceived cognitive functioning (Kratz et al., 2015).
D.2.15.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the MISCI form and keep completed forms, we will call “misci”
<- applyFilter(
misci "misci",
multidimensional_inventory_of_subjective_cognitive_complete )
We will reverse code the last four items which assess cognitive difficulties. For example, cognitive difficulty related question such as “trouble planning out steps of a task” is rated on a scale of 1 to 5 where 1 is “never” and 5 is “very often”. We will reverse code this to indicate 5 as “never” and 1 as “very often”. The structure will look as follows:
Original | Reverse coded |
---|---|
1 | 5 |
2 | 4 |
3 | 3 |
4 | 2 |
5 | 1 |
Reverse code last 4 items: We can accomplish this by subtracting the original values from 6.
<- misci %>%
misci mutate(miscishiftactivscl_reverse = 6 - miscishiftactivscl) %>%
mutate(misciplanningscl_reverse = 6 - misciplanningscl) %>%
mutate(misciexpressscl_reverse = 6 - misciexpressscl) %>%
mutate(miscirightwordsscl_reverse = 6 - miscirightwordsscl)
D.2.15.2 Missing Data
Missing data pattern in “misci”.
<- c(
misci_ten "miscithinkclrscl",
"miscimindsharpscl",
"miscirememberscl",
"miscilearnscl",
"misciconcentratescl",
"misciattentionscl",
"miscishiftactivscl_reverse",
"misciplanningscl_reverse",
"misciexpressscl_reverse",
"miscirightwordsscl_reverse"
)
gg_miss_upset(
%>%
misci select(all_of(misci_ten)),
nsets = n_var_miss(misci),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Missing values can be imputed if the responses to 8 of 10 items are available (less than or equal to 29% missing data).
We will create a variable “misci_diff” and assign a value of 1 if there is a response to more than or equal to 8 of 10 items and 0 if there is a response to less than 8 of 10 items. The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the ten items.
<- misci %>%
misci mutate(misci_not_na = rowSums(!is.na(.[, misci_ten]))) %>%
mutate(
misci_diff = case_when(
>= 8 ~ 1,
misci_not_na TRUE ~ 0
)%>%
) as_tibble() %>%
mutate(misci_diff = as.factor(misci_diff))
We will now replace missing values with the rounded mean of remaining items if misci_diff = 1 and calculate the total score by taking a sum of all ten items, we will call this “imputed_misci_score.”
<- misci %>%
misci mutate(
raw_misci_score = rowSums(select(., all_of(misci_ten)), na.rm = TRUE)
%>%
) mutate(
mean_misci = case_when(
== 1 ~
misci_diff round(rowMeans(select(., all_of(misci_ten)), na.rm = TRUE))
)%>%
) mutate(across(all_of(misci_ten), ~ if_else(is.na(.), mean_misci, .))) %>%
mutate(
imputed_misci_score = case_when(
== 1 ~ rowSums(select(., all_of(misci_ten)), na.rm = TRUE)
misci_diff
) )
Check missingness pattern after imputation
gg_miss_upset(
%>%
misci select(all_of(misci_ten)),
nsets = n_var_miss(misci),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
check distributions of misci total scores before (raw_misci_score) and after imputation (imputed_misci_score)
<- misci %>%
misci_plot filter(misci_diff == 1) %>%
select(guid, raw_misci_score, imputed_misci_score) %>%
pivot_longer(
cols = c("raw_misci_score", "imputed_misci_score"),
values_to = "score",
names_to = "score_name"
)
ggplot(misci_plot, aes(x = score_name, y = score, fill = score_name)) +
geom_violin(alpha = .25)
Remove intermediate field names not needed
<- misci %>%
misci select(-misci_not_na, -mean_misci, -raw_misci_score)
D.2.15.3 New field name(s)
Add the following field names to the data dictionary:
“misci_diff”, “imputed_misci_score”, “miscishiftactivscl_reverse”, “misciplanningscl_reverse”, “misciexpressscl_reverse” and “miscirightwordsscl_reverse”
# Create field names
<- data.frame(
misci_diff_new_row field_name = "misci_diff",
form_name = "multidimensional_inventory_of_subjective_cognitive",
field_type = "factor",
select_choices_or_calculations = "1, if there is a response to more than or equal to 8 of 10 MISCI items | 0, if there is a response to less than 8 of 10 items"
)
<- data.frame(
misci_score_new_row field_name = "imputed_misci_score",
form_name = "multidimensional_inventory_of_subjective_cognitive",
field_type = "numeric",
field_label = "sum of all ten misci items. Replacing missing values with the rounded mean of remaining items if misci_diff = 1"
)
<- data.frame(
miscishiftactivscl_reverse_new_row field_name = "miscishiftactivscl_reverse",
form_name = "multidimensional_inventory_of_subjective_cognitive",
field_type = "numeric",
field_label = "reverse coded miscishiftactivscl"
)
<- data.frame(
misciplanningscl_reverse_new_row field_name = "misciplanningscl_reverse",
form_name = "multidimensional_inventory_of_subjective_cognitive",
field_type = "numeric",
field_label = "reverse coded misciplanningscl"
)
<- data.frame(
misciexpressscl_reverse_new_row field_name = "misciexpressscl_reverse",
form_name = "multidimensional_inventory_of_subjective_cognitive",
field_type = "numeric",
field_label = "reverse coded misciexpressscl"
)
<- data.frame(
miscirightwordsscl_reverse_new_row field_name = "miscirightwordsscl_reverse",
form_name = "multidimensional_inventory_of_subjective_cognitive",
field_type = "numeric",
field_label = "reverse coded miscirightwordsscl"
)
# adding rows
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!misci_diff_new_row,
.after = match("miscitotalscore", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!misci_score_new_row,
.after = match("miscitotalscore", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!miscishiftactivscl_reverse_new_row,
.after = match("miscitotalscore", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!misciplanningscl_reverse_new_row,
.after = match("miscitotalscore", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!misciexpressscl_reverse_new_row,
.after = match("miscitotalscore", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!miscirightwordsscl_reverse_new_row,
.after = match("miscitotalscore", psy_soc_dict$field_name)
)
D.2.15.4 Save:
Save “misci” and updated data dictionary as .csv files in the folder named “reformatted_misci”
write_csv(
misci,here("data", "Psychosocial", "Reformatted", "reformatted_misci.csv")
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.16 The Big Five Inventory (BFI-2-S)
The BFI-2-S is is used to evaluate five personality domains and 15 specific facet traits within the five domains. Altogether, the BFI-2-S comprises thirty items based on a Likert scale ranging from 1-5, where 1 is “disagree strongly” and 5 is “agree strongly”. Some items are negatively phrased and should be reverse coded for scoring (Soto & John, 2017).
Following table shows the five domains and the specific facet traits:
Domains (bold) and domain specific facets | Items (R: Items should be reverse coded) |
---|---|
Extraversion | |
Sociability | 1R, 16 |
Assertiveness | 6, 21R |
Energy Level | 11, 26R |
Agreeableness | |
Compassion | 2, 17R |
Respectfulness | 7R, 22 |
Trust | 12, 27R |
Conscientiousness | |
Organization | 3R, 18 |
Productiveness | 8R, 23 |
Responsibility | 13, 28R |
Negative Emotionality | |
Anxiety | 4, 19R |
Depression | 9, 24R |
Emotional Volatility | 14R, 29 |
Open-Mindedness | |
Aesthetic Sensitivity | 5, 20R |
Intellectual Curiosity | 10R, 25 |
Creative Imagination | 15, 30R |
D.2.16.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the BFI-2-S form and keep completed forms, we will call this “bfi”
<- applyFilter("bfi", the_big_five_inventory_bfi2s_complete) bfi
We will reverse code the items denoted by “R” in the table. For example, Sociability related question such as “Tends to be quiet” is rated on a scale of 1 to 5 where 1 is “Disagree strongly” and 5 is “Agree strongly”. We will reverse code this to indicate 5 as “Disagree strongly” and 1 as “Agree strongly”. The structure will look as follows:
Original | Reverse coded |
---|---|
1 | 5 |
2 | 4 |
3 | 3 |
4 | 2 |
5 | 1 |
Reverse code the items denoted by “R”: We can accomplish this by subtracting the original values from 6.
<- bfi %>%
bfi mutate(
across(
.cols = c(
bfi2squietscl,
bfi2stakechargescl,
bfi2slessactivescl,
bfi2suncaringscl,
bfi2srudescl,
bfi2sfaultwithothersscl,
bfi2sdisorganizedscl,
bfi2sdifficultstartscl,
bfi2scarelessscl,
bfi2srelaxedscl,
bfi2ssecurescl,
bfi2sstableemotionscl,
bfi2sfewartinterestscl,
bfi2sllittleinterestscl,
bfi2slittlecreativityscl
),.fns = \(x) 6 - x,
.names = "{.col}_reverse"
)
)
# credit: P.Sadil
D.2.16.2 Extraversion
D.2.16.2.1 Missing Data
Missing data pattern in Extraversion domain specific items.
<- c(
extra_vector "bfi2squietscl_reverse",
"bfi2soutgoingscl",
"bfi2sdominantscl",
"bfi2stakechargescl_reverse",
"bfi2senergyscl",
"bfi2slessactivescl_reverse"
)
gg_miss_upset(
%>%
bfi select(all_of(extra_vector)),
nsets = n_var_miss(bfi),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Scale score is calculated by taking the mean of the domain specific items. We will calculate raw scale score (without imputed items)
<- bfi %>%
bfi mutate(
raw_extraversion_score = round(rowMeans(
select(., all_of(extra_vector)),
na.rm = TRUE
)) )
For missing items, response to the closest item can be used. For instance if “Tends to be quiet” is missing, the value from “Is outgoing, sociable” can be used to replace the missing value or vice versa Berkeley personality lab.
Replace missing items with the closest matching item.
<- bfi %>%
bfi mutate(
bfi2squietscl_reverse = ifelse(
is.na(bfi2squietscl_reverse),
bfi2soutgoingscl,
bfi2squietscl_reverse
),bfi2soutgoingscl = ifelse(
is.na(bfi2soutgoingscl),
bfi2squietscl_reverse,
bfi2soutgoingscl
)%>%
) mutate(
bfi2sdominantscl = ifelse(
is.na(bfi2sdominantscl),
bfi2stakechargescl_reverse,
bfi2sdominantscl
),bfi2stakechargescl_reverse = ifelse(
is.na(bfi2stakechargescl_reverse),
bfi2sdominantscl,
bfi2stakechargescl_reverse
)%>%
) mutate(
bfi2senergyscl = ifelse(
is.na(bfi2senergyscl),
bfi2slessactivescl_reverse,
bfi2senergyscl
),bfi2slessactivescl_reverse = ifelse(
is.na(bfi2slessactivescl_reverse),
bfi2senergyscl,
bfi2slessactivescl_reverse
) )
Check missingness pattern after imputation
gg_miss_upset(
%>%
bfi select(all_of(extra_vector)),
nsets = n_var_miss(bfi),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
We will now calculate the imputed scale score
<- bfi %>%
bfi mutate(
imputed_extraversion_score = round(rowMeans(
select(., all_of(extra_vector)),
na.rm = TRUE
)) )
check distributions of extraversion scores before (raw_extraversion_score) and after imputation (imputed_extraversion_score)
<- bfi %>%
bfi_ext_plot select(guid, raw_extraversion_score, imputed_extraversion_score) %>%
pivot_longer(
cols = c("raw_extraversion_score", "imputed_extraversion_score"),
values_to = "score",
names_to = "score_name"
%>%
) mutate(score_name = as.factor(score_name)) %>%
mutate(score = as.factor(score)) %>%
group_by(score_name, score) %>%
count() %>%
ungroup() %>%
rename(score_count = n)
ggplot(bfi_ext_plot, aes(x = score, y = score_count, fill = score_name)) +
geom_bar(position = "dodge", stat = "identity") +
theme(axis.text.x = element_text(angle = 70, vjust = 0.5, hjust = 1))
Remove intermediate field names not needed
<- bfi %>%
bfi select(-raw_extraversion_score)
D.2.16.3 Agreeableness
D.2.16.3.1 Missing Data
Missing data pattern in agreeableness domain specific items.
<- c(
agree_vector "bfi2scompassionatescl",
"bfi2suncaringscl_reverse",
"bfi2srudescl_reverse",
"bfi2srespectfulscl",
"bfi2sassumingbestscl",
"bfi2sfaultwithothersscl_reverse"
)
gg_miss_upset(
%>%
bfi select(all_of(agree_vector)),
nsets = n_var_miss(bfi),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Scale score is calculated by taking the mean of the domain specific items. We will calculate raw scale score (without imputed items)
<- bfi %>%
bfi mutate(
raw_agreeableness_score = round(rowMeans(
select(., all_of(agree_vector)),
na.rm = TRUE
)) )
For missing items, response to the closest item can be used Berkeley personality lab.
Replace missing items with the closest matching item.
<- bfi %>%
bfi mutate(
bfi2scompassionatescl = ifelse(
is.na(bfi2scompassionatescl),
bfi2suncaringscl_reverse,
bfi2scompassionatescl
),bfi2suncaringscl_reverse = ifelse(
is.na(bfi2suncaringscl_reverse),
bfi2scompassionatescl,
bfi2suncaringscl_reverse
)%>%
) mutate(
bfi2srudescl_reverse = ifelse(
is.na(bfi2srudescl_reverse),
bfi2srespectfulscl,
bfi2srudescl_reverse
),bfi2srespectfulscl = ifelse(
is.na(bfi2srespectfulscl),
bfi2srudescl_reverse,
bfi2srespectfulscl
)%>%
) mutate(
bfi2sassumingbestscl = ifelse(
is.na(bfi2sassumingbestscl),
bfi2sfaultwithothersscl_reverse,
bfi2sassumingbestscl
),bfi2sfaultwithothersscl_reverse = ifelse(
is.na(bfi2sfaultwithothersscl_reverse),
bfi2sassumingbestscl,
bfi2sfaultwithothersscl_reverse
) )
Check missingness pattern after imputation
gg_miss_upset(
%>%
bfi select(all_of(agree_vector)),
nsets = n_var_miss(bfi),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
We will now calculate the imputed scale score
<- bfi %>%
bfi mutate(
imputed_agreeableness_score = round(rowMeans(
select(., all_of(agree_vector)),
na.rm = TRUE
)) )
check distributions of agreeableness scores before (raw_agreeableness_score) and after imputation (imputed_agreeableness_score)
<- bfi %>%
bfi_agree_plot select(guid, raw_agreeableness_score, imputed_agreeableness_score) %>%
pivot_longer(
cols = c("raw_agreeableness_score", "imputed_agreeableness_score"),
values_to = "score",
names_to = "score_name"
%>%
) mutate(score_name = as.factor(score_name)) %>%
mutate(score = as.factor(score)) %>%
group_by(score_name, score) %>%
count() %>%
ungroup() %>%
rename(score_count = n)
ggplot(bfi_agree_plot, aes(x = score, y = score_count, fill = score_name)) +
geom_bar(position = "dodge", stat = "identity") +
theme(axis.text.x = element_text(angle = 70, vjust = 0.5, hjust = 1))
Remove intermediate field names not needed
<- bfi %>%
bfi select(-raw_agreeableness_score)
D.2.16.4 Conscientiousness
D.2.16.4.1 Missing Data
Missing data pattern in conscientiousness domain specific items.
<- c(
cons_vector "bfi2sdisorganizedscl_reverse",
"bfi2stidyscl",
"bfi2sdifficultstartscl_reverse",
"bfi2spersistentscl",
"bfi2sreliablescl",
"bfi2scarelessscl_reverse"
)
gg_miss_upset(
%>%
bfi select(all_of(cons_vector)),
nsets = n_var_miss(bfi),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Scale score is calculated by taking the mean of the domain specific items. We will calculate raw scale score (without imputed items)
<- bfi %>%
bfi mutate(
raw_cons_score = round(rowMeans(
select(., all_of(cons_vector)),
na.rm = TRUE
)) )
For missing items response to the closest item can be used.Berkeley personality lab.
Replace missing items with the closest matching item.
<- bfi %>%
bfi mutate(
bfi2sdisorganizedscl_reverse = ifelse(
is.na(bfi2sdisorganizedscl_reverse),
bfi2stidyscl,
bfi2sdisorganizedscl_reverse
),bfi2stidyscl = ifelse(
is.na(bfi2stidyscl),
bfi2sdisorganizedscl_reverse,
bfi2stidyscl
)%>%
) mutate(
bfi2sdifficultstartscl_reverse = ifelse(
is.na(bfi2sdifficultstartscl_reverse),
bfi2spersistentscl,
bfi2sdifficultstartscl_reverse
),bfi2spersistentscl = ifelse(
is.na(bfi2spersistentscl),
bfi2sdifficultstartscl_reverse,
bfi2spersistentscl
)%>%
) mutate(
bfi2sreliablescl = ifelse(
is.na(bfi2sreliablescl),
bfi2scarelessscl_reverse,
bfi2sreliablescl
),bfi2scarelessscl_reverse = ifelse(
is.na(bfi2scarelessscl_reverse),
bfi2sreliablescl,
bfi2scarelessscl_reverse
) )
Check missingness pattern after imputation
gg_miss_upset(
%>%
bfi select(all_of(cons_vector)),
nsets = n_var_miss(bfi),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
We will now calculate the imputed scale score
<- bfi %>%
bfi mutate(
imputed_cons_score = round(rowMeans(
select(., all_of(cons_vector)),
na.rm = TRUE
)) )
Check distributions of conscientiousness scores before (raw_cons_score) and after imputation (imputed_cons_score)
<- bfi %>%
bfi_cons_plot select(guid, raw_cons_score, imputed_cons_score) %>%
pivot_longer(
cols = c("raw_cons_score", "imputed_cons_score"),
values_to = "score",
names_to = "score_name"
%>%
) mutate(score_name = as.factor(score_name)) %>%
mutate(score = as.factor(score)) %>%
group_by(score_name, score) %>%
count() %>%
ungroup() %>%
rename(score_count = n)
ggplot(bfi_cons_plot, aes(x = score, y = score_count, fill = score_name)) +
geom_bar(position = "dodge", stat = "identity") +
theme(axis.text.x = element_text(angle = 70, vjust = 0.5, hjust = 1))
Remove intermediate field names not needed
<- bfi %>%
bfi select(-raw_cons_score)
D.2.16.5 Negative Emotionality
D.2.16.5.1 Missing Data
Missing data pattern in negative emotionality domain specific items.
<- c(
emotion_vector "bfi2sworriesscl",
"bfi2srelaxedscl_reverse",
"bfi2sdepressedscl",
"bfi2ssecurescl_reverse",
"bfi2stemperamentalscl",
"bfi2sstableemotionscl_reverse"
)
gg_miss_upset(
%>%
bfi select(all_of(emotion_vector)),
nsets = n_var_miss(bfi),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Scale score is calculated by taking the mean of the domain specific items. We will calculate raw scale score (without imputed items)
<- bfi %>%
bfi mutate(
raw_emotional_score = round(rowMeans(
select(., all_of(emotion_vector)),
na.rm = TRUE
)) )
For missing items response to the closest item can be used Berkeley personality lab.
Replace missing items with the closest matching item.
<- bfi %>%
bfi mutate(
bfi2sworriesscl = ifelse(
is.na(bfi2sworriesscl),
bfi2srelaxedscl_reverse,
bfi2sworriesscl
),bfi2srelaxedscl_reverse = ifelse(
is.na(bfi2srelaxedscl_reverse),
bfi2sworriesscl,
bfi2srelaxedscl_reverse
)%>%
) mutate(
bfi2sdepressedscl = ifelse(
is.na(bfi2sdepressedscl),
bfi2ssecurescl_reverse,
bfi2sdepressedscl
),bfi2ssecurescl_reverse = ifelse(
is.na(bfi2ssecurescl_reverse),
bfi2sdepressedscl,
bfi2ssecurescl_reverse
)%>%
) mutate(
bfi2stemperamentalscl = ifelse(
is.na(bfi2stemperamentalscl),
bfi2sstableemotionscl_reverse,
bfi2stemperamentalscl
),bfi2sstableemotionscl_reverse = ifelse(
is.na(bfi2sstableemotionscl_reverse),
bfi2stemperamentalscl,
bfi2sstableemotionscl_reverse
) )
Check missingness pattern after imputation
gg_miss_upset(
%>%
bfi select(all_of(emotion_vector)),
nsets = n_var_miss(bfi),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
We will now calculate the imputed scale score
<- bfi %>%
bfi mutate(
imputed_emotional_score = round(rowMeans(
select(., all_of(emotion_vector)),
na.rm = TRUE
)) )
Check distributions negative emotionality scores before (raw_emotional_score) and after imputation (imputed_emotional_score).
<- bfi %>%
bfi_emo_plot select(guid, raw_emotional_score, imputed_emotional_score) %>%
pivot_longer(
cols = c("raw_emotional_score", "imputed_emotional_score"),
values_to = "score",
names_to = "score_name"
%>%
) mutate(score_name = as.factor(score_name)) %>%
mutate(score = as.factor(score)) %>%
group_by(score_name, score) %>%
count() %>%
ungroup() %>%
rename(score_count = n)
ggplot(bfi_emo_plot, aes(x = score, y = score_count, fill = score_name)) +
geom_bar(position = "dodge", stat = "identity") +
theme(axis.text.x = element_text(angle = 70, vjust = 0.5, hjust = 1))
Remove intermediate field names not needed
<- bfi %>%
bfi select(-raw_emotional_score)
D.2.16.6 Open-Mindedness
D.2.16.6.1 Missing Data
Missing data pattern in open-mindedness domain specific items.
<- c(
open_vector "bfi2sfascinatedscl",
"bfi2sfewartinterestscl_reverse",
"bfi2sllittleinterestscl_reverse",
"bfi2sthinkerscl",
"bfi2soriginalscl",
"bfi2slittlecreativityscl_reverse"
)
gg_miss_upset(
%>%
bfi select(all_of(open_vector)),
nsets = n_var_miss(bfi),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Scale score is calculated by taking the mean of the domain specific items. We will calculate raw scale score (without imputed items)
<- bfi %>%
bfi mutate(
raw_open_score = round(rowMeans(
select(., all_of(open_vector)),
na.rm = TRUE
)) )
For missing items response to the closest item can be used.Berkeley personality lab.
Replace missing items with the closest matching item.
<- bfi %>%
bfi mutate(
bfi2sfascinatedscl = ifelse(
is.na(bfi2sfascinatedscl),
bfi2sfewartinterestscl_reverse,
bfi2sfascinatedscl
),bfi2sfewartinterestscl_reverse = ifelse(
is.na(bfi2sfewartinterestscl_reverse),
bfi2sfascinatedscl,
bfi2sfewartinterestscl_reverse
)%>%
) mutate(
bfi2sllittleinterestscl_reverse = ifelse(
is.na(bfi2sllittleinterestscl_reverse),
bfi2sthinkerscl,
bfi2sllittleinterestscl_reverse
),bfi2sthinkerscl = ifelse(
is.na(bfi2sthinkerscl),
bfi2sllittleinterestscl_reverse,
bfi2sthinkerscl
)%>%
) mutate(
bfi2soriginalscl = ifelse(
is.na(bfi2soriginalscl),
bfi2slittlecreativityscl_reverse,
bfi2soriginalscl
),bfi2slittlecreativityscl_reverse = ifelse(
is.na(bfi2slittlecreativityscl_reverse),
bfi2soriginalscl,
bfi2slittlecreativityscl_reverse
) )
Check missingness pattern after imputation
gg_miss_upset(
%>%
bfi select(all_of(open_vector)),
nsets = n_var_miss(bfi),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
We will now calculate the imputed scale score
<- bfi %>%
bfi mutate(
imputed_open_score = round(rowMeans(
select(., all_of(open_vector)),
na.rm = TRUE
)) )
Check distributions of open-mindedness scores before (raw_open_score) and after imputation (imputed_open_score)
<- bfi %>%
bfi_open_plot select(guid, raw_open_score, imputed_open_score) %>%
pivot_longer(
cols = c("raw_open_score", "imputed_open_score"),
values_to = "score",
names_to = "score_name"
%>%
) mutate(score_name = as.factor(score_name)) %>%
mutate(score = as.factor(score)) %>%
group_by(score_name, score) %>%
count() %>%
ungroup() %>%
rename(score_count = n)
ggplot(bfi_open_plot, aes(x = score, y = score_count, fill = score_name)) +
geom_bar(position = "dodge", stat = "identity") +
theme(axis.text.x = element_text(angle = 70, vjust = 0.5, hjust = 1))
Remove intermediate field names not needed
<- bfi %>%
bfi select(-raw_open_score)
D.2.16.7 New field name(s)
Add the following field names to the data dictionary:
“imputed_extraversion_score”, “imputed_agreeableness_score”, “imputed_cons_score”, “imputed_emotional_score”, “imputed_open_score”, “bfi2squietscl_reverse”, “bfi2stakechargescl_reverse”, “bfi2slessactivescl_reverse”, “bfi2suncaringscl_reverse”, “bfi2srudescl_reverse”, “bfi2sfaultwithothersscl_reverse”, “bfi2sdisorganizedscl_reverse”, “bfi2sdifficultstartscl_reverse”, “bfi2scarelessscl_reverse”, “bfi2srelaxedscl_reverse”, “bfi2ssecurescl_reverse”, “bfi2sstableemotionscl_reverse”, “bfi2sfewartinterestscl_reverse”, “bfi2sllittleinterestscl_reverse”, and “bfi2slittlecreativityscl_reverse”
# Create field names
<- data.frame(
extra_score_new_row field_name = "imputed_extraversion_score",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "Mean of Extraversion domain items. Replacing missing values with the value of the closest matching item (similar meaning)"
)
<- data.frame(
agree_score_new_row field_name = "imputed_agreeableness_score",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "Mean of Agreeableness domain items. Replacing missing values with the value of the closest matching item (similar meaning)"
)
<- data.frame(
cons_score_new_row field_name = "imputed_cons_score",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "Mean of conscientiousness domain items. Replacing missing values with the value of the closest matching item (similar meaning)"
)
<- data.frame(
emo_score_new_row field_name = "imputed_emotional_score",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "Mean of negative emotionality domain items. Replacing missing values with the value of the closest matching item (similar meaning)"
)
<- data.frame(
open_score_new_row field_name = "imputed_open_score",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "Mean of open-mindedness domain items. Replacing missing values with the value of the closest matching item (similar meaning)"
)
<- data.frame(
bfi2squietscl_new_row field_name = "bfi2squietscl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2squietscl"
)
<- data.frame(
bfi2stakechargescl_new_row field_name = "bfi2stakechargescl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2stakechargescl"
)
<- data.frame(
bfi2slessactivescl_new_row field_name = "bfi2slessactivescl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2slessactivescl"
)
<- data.frame(
bfi2suncaringscl_new_row field_name = "bfi2suncaringscl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2suncaringscl"
)
<- data.frame(
bfi2srudescl_new_row field_name = "bfi2srudescl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2srudescl"
)
<- data.frame(
bfi2sfaultwithothersscl_new_row field_name = "bfi2sfaultwithothersscl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2sfaultwithothersscl"
)
<- data.frame(
bfi2sdisorganizedscl_new_row field_name = "bfi2sdisorganizedscl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2sdisorganizedscl"
)
<- data.frame(
bfi2sdifficultstartscl_new_row field_name = "bfi2sdifficultstartscl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2sdifficultstartscl"
)
<- data.frame(
bfi2scarelessscl_new_row field_name = "bfi2scarelessscl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2scarelessscl"
)
<- data.frame(
bfi2srelaxedscl_new_row field_name = "bfi2srelaxedscl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2srelaxedscl"
)
<- data.frame(
bfi2ssecurescl_new_row field_name = "bfi2ssecurescl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2ssecurescl"
)
<- data.frame(
bfi2sstableemotionscl_new_row field_name = "bfi2sstableemotionscl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2sstableemotionscl"
)
<- data.frame(
bfi2sfewartinterestscl_reverse_new_row field_name = "bfi2sfewartinterestscl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2sfewartinterestscl"
)
<- data.frame(
bfi2sllittleinterestscl_reverse_new_row field_name = "bfi2sllittleinterestscl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2sllittleinterestscl"
)
<- data.frame(
bfi2slittlecreativityscl_reverse_new_row field_name = "bfi2slittlecreativityscl_reverse",
form_name = "the_big_five_inventory_bfi2s",
field_type = "numeric",
field_label = "reverse coded bfi2slittlecreativityscl"
)
# adding rows
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!extra_score_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!agree_score_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!cons_score_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!emo_score_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!open_score_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2squietscl_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2stakechargescl_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2slessactivescl_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2suncaringscl_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2srudescl_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2sfaultwithothersscl_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2sdisorganizedscl_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2sdifficultstartscl_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2scarelessscl_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2srelaxedscl_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2ssecurescl_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2sstableemotionscl_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2sfewartinterestscl_reverse_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2sllittleinterestscl_reverse_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!bfi2slittlecreativityscl_reverse_new_row,
.after = match("bfi2slittlecreativityscl", psy_soc_dict$field_name)
)
D.2.16.8 Save:
Save “bfi” and updated data dictionary as .csv files in the folder named “reformatted_bfi”
write_csv(
bfi,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_bfi.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.17 Danish Thoracic Surgery Questionnaire v0.2
The Danish Thoracic Surgery Questionnaire comprises 17 items to assess functional impairment experienced post thoracotomy. each item is rated on a scale of 0 to 4, where 0 indicates no impairment due to pain and 4 indicates complete impairement due to pain. There is an additional indicator “I never do this activity” for each item. The total score is calculated as the sum of all seventeen items[Ringsted et al. (2013)](Bayman, 2017)
D.2.17.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the The Danish Thoracic Surgery Questionnaire data and keep completed forms. Keeping completed forms will subset to the thoracotomy cohort by default, we will call “dts”
<- applyFilter("dts", danish_thoracic_surgery_questionnaire_v02_complete) dts
D.2.17.2 Missing Data:
Missing data pattern in dts.
gg_miss_upset(
dts,nsets = n_var_miss(dts),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Handling missing data:
In case of missing values, a total score can be calculated if responses to 13 of 17 items are available (0.29 x 17 = 4.93). The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the 17 items(refer to section 1.c).
We will create a variable “dts_diff” and assign a value of 1 if there is a response to more than or equal to 13 of 17 DTS items and 0 if there is a response to less than 13 of 17 DTS items.
<- c(
dts_vector "dtsrunningscl",
"dtscarryingscl",
"dtswashingscl",
"dtscleaningscl",
"dtswalkingscl",
"dtsstairsscl",
"dtskneelingscl",
"dtsstandingscl",
"dtsoutofbedscl",
"dtsswimmingscl",
"dtscyclingscl",
"dtsdrivingscl",
"dtslyingopsidescl",
"dtscoughingscl",
"dtssittingscl",
"dtswatchtvscl",
"dtssleepscl"
)
<- dts %>%
dts mutate(dts_not_na = rowSums(!is.na(.[, dts_vector]))) %>%
mutate(
dts_diff = case_when(
>= 13 ~ 1,
dts_not_na TRUE ~ 0
)%>%
) as_tibble() %>%
mutate(dts_diff = as.factor(dts_diff))
We will now replace missing values with the rounded mean of remaining items if dts_diff = 1 i.e if 13 of 17 items are available and calculate the total score by taking a sum of all the items. We will exclude responses indicated by a “96” i.e. “I never do this activity” from all calculations.
<- dts %>%
dts mutate(
raw_dts_score = rowSums(
select(., all_of(dts_vector)) * (select(., all_of(dts_vector)) != 96),
na.rm = TRUE
)%>%
) mutate(
mean_dts = case_when(
== 1 ~
dts_diff round(rowMeans(
select(., all_of(dts_vector)) * (select(., all_of(dts_vector)) != 96),
na.rm = TRUE
))
)%>%
) mutate(across(all_of(dts_vector), ~ if_else(is.na(.), mean_dts, .))) %>%
mutate(
imputed_dts_score = case_when(
== 1 ~
dts_diff rowSums(
select(., all_of(dts_vector)) * (select(., all_of(dts_vector)) != 96),
na.rm = TRUE
)
) )
Check missingness pattern after imputation
gg_miss_upset(
%>%
dts select(-imputed_dts_score, -mean_dts),
nsets = n_var_miss(dts),
order.by = "degree",
point.size = 1,
line.size = 0.25,
text.scale = c(0.5)
)
Check distributions of DTS scores before (raw_dts_score) and after imputation (imputed_dts_score)
<- dts %>%
dts_plot filter(between(dts_not_na, 13, 17)) %>%
select(guid, raw_dts_score, imputed_dts_score) %>%
pivot_longer(
cols = c("raw_dts_score", "imputed_dts_score"),
values_to = "score",
names_to = "score_name"
)
ggplot(dts_plot, aes(x = score_name, y = score, fill = score_name)) +
geom_violin(alpha = .25)
Remove intermediate field names not needed
<- dts %>%
dts select(-dts_not_na, -mean_dts, raw_dts_score)
D.2.17.3 New field name(s):
Add the field names “dts_diff” and “imputed_dts_score”to the data dictionary
# Create field names
<- data.frame(
dts_diff_new_row field_name = "dts_diff",
form_name = "danish_thoracic_surgery_questionnaire_v02",
field_type = "factor",
select_choices_or_calculations = "1, if there is a response to more than or equal to 13 of 17 DTS items|0, if there is a response to less than 13 of 16 DTS items"
)
<- data.frame(
dts_score_new_row field_name = "imputed_dts_score",
form_name = "danish_thoracic_surgery_questionnaire_v02",
field_type = "numeric",
field_label = "A higher score indicates a higher degree of functional impairment due to pain",
field_note = "The missing responses are first replaced with the average score of the completed items and then a total score is calculated by adding the scores for all the items, responses indicated by 96 were excluded from the total score"
)
# adding fabq_diff
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!dts_diff_new_row,
.after = match("dtssleepscl", psy_soc_dict$field_name)
)
# imputed fabq score
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!dts_score_new_row,
.after = match("dtssleepscl", psy_soc_dict$field_name)
)
D.2.17.4 Save:
Save “dts” and updated data dictionary as .csv files in the folder named “reformatted_dts”
write_csv(
dts,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_dts.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )
D.2.18 Rapid Assessment of Physical Activity v1.0 (RAPA)
The Rapid Assessment of Physical Activity Questionnaire comprises 9 items to assess physical activity in subject over 50 years of age. The first 7 items measure aerobic activity and the last 2 items measure strength/flexibility. Each item has a yes/no response (Topolski et al., 2006). Here we well compute aerobic activity score based on the highest affirmative response.
D.2.18.1 Read in Data:
Read in psy_soc1 dataframe and select field names from the RAPA and keep completed forms, we will call “rapa”
<- applyFilter(
rapa "rapa",
rapid_assessment_of_physical_activity_v10_rapa_complete%>%
) retype()
Missing values cannot be imputed, we well compute aerobic activity score based on the highest affirmative response.
<- rapa %>%
rapa mutate(
rapa_rarly1 = case_when(
== 1 ~ 1,
rapa_rarly TRUE ~ rapa_rarly
),rapa_light1 = case_when(
== 1 ~ 2,
rapa_light TRUE ~ rapa_light
),rapa_lightweekly1 = case_when(
== 1 ~ 3,
rapa_lightweekly TRUE ~ rapa_lightweekly
),rapa_mod1 = case_when(
== 1 ~ 4,
rapa_mod TRUE ~ rapa_mod
),rapa_vig1 = case_when(
== 1 ~ 4,
rapa_vig TRUE ~ rapa_vig
),rapa_modweekly1 = case_when(
== 1 ~ 5,
rapa_modweekly TRUE ~ rapa_modweekly
),rapa_vigweekly1 = case_when(
== 1 ~ 5,
rapa_vigweekly TRUE ~ rapa_vigweekly
),rapa_aerobic_score = pmax(
rapa_rarly1,
rapa_light1,
rapa_lightweekly1,
rapa_mod1,
rapa_vig1,
rapa_modweekly1,
rapa_vigweekly1,na.rm = TRUE
)%>%
) mutate(
rapa_aerobic_category = case_when(
== 1 ~ "sedentary",
rapa_aerobic_score == 2 ~ "underactive",
rapa_aerobic_score == 3 ~ "underactive_regular-light_activities",
rapa_aerobic_score == 4 ~ "underactive_regular",
rapa_aerobic_score == 5 ~ "active",
rapa_aerobic_score == 0 ~ "answered_no_to_all_aerobic_items",
rapa_aerobic_score TRUE ~ NA
)%>%
) mutate(rapa_aerobic_category = as.factor(rapa_aerobic_category))
We will create a variable “rapa_aerobic_missing” and assign a value of 1 if “rapa_aerobic_score” is missing and 0 if “rapa_aerobic_score” is available
<- rapa %>%
rapa mutate(
rapa_aerobic_missing = case_when(is.na(rapa_aerobic_score) ~ 1, TRUE ~ 0)
)
Check number of missing aerobic score
table(rapa$rapa_aerobic_missing)
0 1
1360 2
D.2.18.2 New field name(s)
Add the field names “rapa_aerobic_score” and “rapa_aerobic_missing” to the data dictionary
# Create field names
<- data.frame(
aerobic_score_new_row field_name = "rapa_aerobic_score",
form_name = "rapid_assessment_of_physical_activity_v10_rapa",
field_type = "numeric",
select_choices_or_calculations = "0, if answered_no_to_all_aerobic_itemse|1,Sedentary|2,Underactive|3, Underactive_regular-light_activities|4,Underactive_regular|5,Active"
)
<- data.frame(
aerobic_category_new_row field_name = "rapa_aerobic_category",
form_name = "rapid_assessment_of_physical_activity_v10_rapa",
field_type = "Factor",
select_choices_or_calculations = "if answered_no_to_all_aerobic_itemse|Sedentary|Underactive|Underactive_regular-light_activities|Underactive_regular|Active"
)
<- data.frame(
aerobic_missing_new_row field_name = "rapa_aerobic_missing",
form_name = "rapid_assessment_of_physical_activity_v10_rapa",
field_type = "numeric",
select_choices_or_calculations = "1 if rapa_aerobic_score is missing|0, if rapa_aerobic_score is not missing"
)#################
<- data.frame(
rapa1_new_row field_name = "rapa_rarly1",
form_name = "rapid_assessment_of_physical_activity_v10_rapa",
field_type = "numeric",
field_label = " recoded as 1 if subject's highest affirmative response for aerobic activity related questions"
)
<- data.frame(
rapa2_new_row field_name = "rapa_light1",
form_name = "rapid_assessment_of_physical_activity_v10_rapa",
field_type = "numeric",
field_label = "recoded as 2 if subject's highest affirmative response for aerobic activity related questions"
)
<- data.frame(
rapa3_new_row field_name = "rapa_lightweekly1",
form_name = "rapid_assessment_of_physical_activity_v10_rapa",
field_type = "numeric",
field_label = "recoded as 3 if subject's highest affirmative response for aerobic activity related questions"
)
<- data.frame(
rapa4_new_row field_name = "rapa_mod1",
form_name = "rapid_assessment_of_physical_activity_v10_rapa",
field_type = "numeric",
field_label = "recoded as 4 if subject's highest affirmative response for aerobic activity related questions"
)
<- data.frame(
rapa5_new_row field_name = "rapa_vig1",
form_name = "rapid_assessment_of_physical_activity_v10_rapa",
field_type = "numeric",
field_label = "recoded as 4 if subject's highest affirmative response for aerobic activity related questions"
)
<- data.frame(
rapa6_new_row field_name = "rapa_modweekly1",
form_name = "rapid_assessment_of_physical_activity_v10_rapa",
field_type = "numeric",
field_label = "recoded as 5 if subject's highest affirmative response for aerobic activity related questions"
)
<- data.frame(
rapa7_new_row field_name = "rapa_vigweekly1",
form_name = "rapid_assessment_of_physical_activity_v10_rapa",
field_type = "numeric",
field_label = "recoded as 5 if subject's highest affirmative response for aerobic activity related questions"
)
####
# adding rows
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!aerobic_score_new_row,
.after = match("rapa_2score", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!aerobic_category_new_row,
.after = match("rapa_2score", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!aerobic_missing_new_row,
.after = match("rapa_2score", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!rapa1_new_row,
.after = match("rapa_2score", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!rapa2_new_row,
.after = match("rapa_2score", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!rapa3_new_row,
.after = match("rapa_2score", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!rapa4_new_row,
.after = match("rapa_2score", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!rapa5_new_row,
.after = match("rapa_2score", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!rapa6_new_row,
.after = match("rapa_2score", psy_soc_dict$field_name)
)
<- psy_soc_dict %>%
psy_soc_dict add_row(
!!!rapa7_new_row,
.after = match("rapa_2score", psy_soc_dict$field_name)
)
D.2.18.3 Save:
Save “rapa” and updated data dictionary as .csv files in the folder named “reformatted_rapa”
write_csv(
rapa,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"reformatted_rapa.csv"
)
)write_csv(
psy_soc_dict,file = here::here(
"data",
"Psychosocial",
"Reformatted",
"updated_psy_soc_dictionary.csv"
) )