Appendix F — Functional CRF Data Quality Checks

Authors

Martin Lindquist

Briha Ansari

This document goes over quality checks for each the Functional Testing Case Report Form (CRF). The headings in the sidebar help the user navigate to their desired content.

F.1 Read in Data and write functions

F.1.1 Load Libraries

library(shiny)
library(forcats)
library(tidyverse)
library(here)
library(hablar)
library(janitor)
library(gt)

F.1.2 Function

Write a function to remove columns where all rows have NA, this is will remove duplicate columns for the the Thoracotomy/TKA cohort

not_all_na <- function(x) any(!is.na(x))

F.2 CRF Quality checks

F.2.1 functional-testing Assessment Form

F.2.2 Read in data

We will call this func

# read_csv function results in parsing errors, we will use read.csv instead
func <- read.csv(here(
  "data",
  "functional-testing",
  "functional-testing-2024-11-06.csv"
)) %>%
  retype()

Remove test records

test_records <- c(
  "10000",
  "15000",
  "20000",
  "25000",
  "40000",
  "50000",
  "60000",
  "70000",
  "80000",
  "90000",
  "100000",
  "110000",
  "120000"
)

func <- func %>%
  filter(!record_id %in% test_records)

Create a column for cohort type called “cohort”

func <- func %>%
  mutate(
    cohort = case_when(
      record_id >= 10000 & record_id < 15000 | record_id >= 25000 ~ "TKA",
      TRUE ~ "Thoracic"
    )
  )

F.2.3 Data Dictionary

Read in data dictionary and remove duplicate field names

func_dict <- read_csv(here(
  "data",
  "functional-testing",
  "functional-testing-Data-Dictionary-2024-11-06.csv"
)) %>%
  distinct(field_name, .keep_all = TRUE)

F.2.4 New field name(s):

Add the field name “cohort” to the data dictionary

# Create field names
cohort_new_row <- data.frame(
  field_name = "cohort",
  field_type = "Character",
  field_note = "Type of surgical cohort",
  select_choices_or_calculations = "TKA,Thoracic"
)

# Add the new row after the last row

func_dict <- func_dict %>%
  slice(1:nrow(.)) %>%
  add_row(.after = nrow(.), !!!cohort_new_row)

F.2.5 TKA Functional Assessment

tka_func <- func %>%
  select(
    record_id,
    guid,
    redcap_data_access_group,
    redcap_event_name,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    starts_with("walk"),
    starts_with("tsts"),
    functional_testing_complete,
    cohort
  )
tka_func <- tka_func %>%
  filter(cohort == "TKA") %>%
  filter(redcap_repeat_instrument == "functional_testing") %>%
  filter(functional_testing_complete == 2 & !is.na(redcap_repeat_instance)) %>%
  group_by(record_id, redcap_event_name) %>%
  top_n(1, redcap_repeat_instance) %>%
  ungroup() %>%
  select(where(not_all_na))

Keep records of subjects who completed the test.

frdata3 <- tka_func %>%
  filter(walk10completeyn == 1 & functional_testing_complete == 2)

F.2.5.1 10m Walk Test:

F.2.5.1.1 Flag 1:

Check for discrepancy or missing values in the first and the second initial pain ratings.

ferror1 <- frdata3 %>%
  mutate(init_pain_diff = walk10initialpainscl - walk10initialpainscl1) %>%
  filter(init_pain_diff != 0 | is.na(init_pain_diff)) %>%
  add_column(
    error_type = "Walk test:Discrepancy or missing values in the first and the second initial pain ratings"
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.5.1.2 Flag 2:

Check for discrepancy or missing values in the first and the second final pain ratings.

ferror2 <- frdata3 %>%
  mutate(final_pain_diff = walk10finalpainscl - walk10finalpainscl1) %>%
  filter(final_pain_diff != 0 | is.na(final_pain_diff)) %>%
  add_column(
    error_type = "Walk test:Discrepancy or missing values in the first and the second initial pain ratings"
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.5.1.3 Flag 3:

Check for missing values or discrepancy between the first and the second walk time.

ferror3 <- frdata3 %>%
  retype() %>%
  mutate(walk_time_diff = walk10time - walk10time1) %>%
  filter(walk_time_diff != 0 | is.na(walk_time_diff)) %>%
  add_column(
    error_type = "Walk test:Missing values or discrepancy between the first and the second walk time"
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.5.1.4 Flag 4:

Check if the reason for test not completed was not specified.

ferror4 <- tka_func %>%
  filter(walk10completeyn == 0) %>%
  filter(is.na(walk10incompletereason)) %>%
  add_column(
    error_type = "Walk test:If the reason for test not completed was marked off"
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.5.1.5 Flag 5:

Check for records with missing values for any assistance if the walk test was completed.

ferror5 <- frdata3 %>%
  filter(walk10completeyn == 1) %>%
  filter(is.na(walk10assistyn)) %>%
  add_column(
    error_type = "Walk test:missing values for any assistance if the walk test was completed"
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.5.1.6 Flag 6:

For subjects who used any assistance during the walk test, check for records with type of assistance left “unchecked”.

ferror6 <- frdata3 %>%
  filter(walk10assistyn == 1) %>%
  filter(
    walk10assist_cane___1 == 0 &
      walk10assist_crutch___1 == 0 &
      walk10assist_perssuppt___1 == 0 &
      walk10assist_other___1 == 0 &
      walk10assist_walkder___1 == 0
  ) %>%
  add_column(error_type = "Walk test:type of assistance unchecked") %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )

F.2.5.2 5 times sit to stand test (5tsts)

F.2.5.2.1 Flag 7:

Check for missing bp values if 5tsts was completed.

frdata.sit <- tka_func


ferror.bp <- frdata.sit %>%
  filter(tstscompleteyn == 1 & is.na(tstsbpscreen)) %>%
  add_column(error_type = "5tsts:missing bp values if 5tsts was completed") %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )

keep records for subjects who completed the test.

frdata.sit1 <- frdata.sit %>%
  filter(tstscompleteyn == 1 & functional_testing_complete == 2)
F.2.5.2.2 Flag 8:

Check for discrepancy or missing values in the first and the second initial pain rating.

ferror1.sit <- frdata.sit1 %>%
  mutate(init_pain_diff.sit = tstsprepainscl - tstsprepainscl1) %>%
  filter(init_pain_diff.sit != 0 | is.na(init_pain_diff.sit)) %>%
  add_column(
    error_type = "5tsts:discrepancy or missing values for the first and the second initial pain rating "
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.5.2.3 Flag 9:

Check for discrepancy or missing values in the first and the second final pain rating.

ferror2.sit <- frdata.sit1 %>%
  mutate(final_pain_diff.sit = tstspostpainscl - tstspostpainscl1) %>%
  filter(final_pain_diff.sit != 0 | is.na(final_pain_diff.sit)) %>%
  add_column(
    error_type = "5tsts:discrepancy or missing values for the first and the second final pain rating "
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.5.2.4 Flag 10:

Check for discrepancy or missing values the first and the second activity time.

ferror3.sit <- frdata.sit1 %>%
  retype() %>%
  mutate(sit_time_diff = tststime - tststime1) %>%
  filter(sit_time_diff != 0 | is.na(sit_time_diff)) %>%
  add_column(
    error_type = "5tsts:discrepancy or missing values the first and the second activity time"
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.5.2.5 Flag 11:

Check if the reason for test not completed was not specified.

ferror4.sit <- frdata.sit %>%
  filter(tstscompleteyn == 0) %>%
  filter(is.na(tstsnonreasonyn)) %>%
  add_column(
    error_type = "5tsts:if the reason for test not completed was not specified"
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.5.2.6 Flag 12:

Check if the test was not completed but was Initiated, and the number of reps completed (n/5) were not specified)

ferror5.sit <- frdata.sit %>%
  filter(tstscompleteyn == 0) %>%
  filter(tstsnonreasonyn == 1) %>%
  filter(is.na(tstsnumbrepsyn)) %>%
  add_column(
    error_type = "5tsts:If the test was not completed but was Initiated, and the number of reps completed (n/5) were not specified"
  ) %>%
  add_column(errors = "none") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.5.2.7 Flag 13:

Check for records with missing values for any assistance if the test was completed.

ferror6.sit <- frdata.sit1 %>%
  filter(tstscompleteyn == 1) %>%
  filter(is.na(tstsassistyn)) %>%
  add_column(
    error_type = "5tsts:missing values for any assistance if the test was completed"
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.5.2.8 Flag 14:

For subjects who used any assistance during the walk test, check for records with type of assistance left “unchecked”.

ferror7.sit <- frdata.sit1 %>%
  filter(tstsassistyn == 1) %>%
  filter(tstsassist_1___1 == 0 & tstsassist_2___1 == 0) %>%
  add_column(error_type = "5tsts:type of assistance unchecked") %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.5.2.9 Biomarkers for the TKA cohort
F.2.5.2.9.1 Movement Evoked Pain (MEP):

MEP refers to pain that occurs or is worsened by movement (Berardi et al., 2022). MEP can not be computed if any of the pain ratings are missing.

Berardi, G., Frey-Law, L., Sluka, K. A., Bayman, E. O., Coffey, C. S., Ecklund, D., Vance, C. G. T., Dailey, D. L., Burns, J., Buvanendran, A., McCarthy, R. J., Jacobs, J., Zhou, X. J., Wixson, R., Balach, T., Brummett, C. M., Clauw, D., Colquhoun, D., Harte, S. E., … Wandner, L. D. (2022). Multi-site observational study to assess biomarkers for susceptibility or resilience to chronic pain: The acute to chronic pain signatures (A2CPS) study protocol. Frontiers in Medicine, 9. https://doi.org/10.3389/fmed.2022.849214
F.2.5.2.9.2 10m Walk Test MEP:

MEP 10m walk test= Final pain rating - Initial pain rating

tka_func <- tka_func %>%
  mutate(mep_walk = walk10finalpainscl - walk10initialpainscl)
F.2.5.2.9.3 5TSTS Test MEP:

MEP 5TSTS test = Final pain rating - Initial pain rating

tka_func <- tka_func %>%
  mutate(mep_5tsts = tstspostpainscl - tstsprepainscl)
F.2.5.2.10 New field name(s):

Add field names for the computed biomarkers to the Functional Testing data dictionary

# Create field names
walk_mep_new_row <- data.frame(
  field_name = "mep_walk",
  field_type = "numeric",
  field_note = "Final pain rating - initial pain rating (10m walk test) "
)

tsts_mep_new_row <- data.frame(
  field_name = "mep_5tsts",
  field_type = "numeric",
  field_note = "Final pain rating - initial pain rating (5TSTS test)"
)


# Add the new row after the last row

func_dict <- func_dict %>%
  slice(1:nrow(.)) %>%
  add_row(.after = nrow(.), !!!walk_mep_new_row) %>%
  add_row(.after = nrow(.), !!!tsts_mep_new_row)
F.2.5.2.11 Create functional assessment error report for the TKA cohort.
# Specify the common prefix
func_error <- "ferror"

# Find data frames in the global environment with the specified prefix
func_list <- mget(ls(pattern = paste0("^", func_error)))

# Combine the data frames using bind_rows
tka_func_report <- bind_rows(func_list) %>%
  pivot_wider(names_from = "error_type", values_from = "errors") %>%
  mutate_all(~ replace_na(., ""))
tka_func_report %>%
  gt() %>%
  tab_header(
    title = md("**TKA Functional Assessment Error Report**")
  ) %>%
  tab_options(
    table.font.size = px(12),
    column_labels.font.size = px(12)
  ) %>%
  tab_style(
    style = list(cell_fill(color = "#F4F4F4")),
    locations = cells_body(columns = record_id)
  )
TKA Functional Assessment Error Report
record_id redcap_data_access_group redcap_repeat_instrument redcap_repeat_instance 5tsts:missing bp values if 5tsts was completed Walk test:Discrepancy or missing values in the first and the second initial pain ratings 5tsts:discrepancy or missing values for the first and the second initial pain rating 5tsts:discrepancy or missing values for the first and the second final pain rating Walk test:Missing values or discrepancy between the first and the second walk time 5tsts:discrepancy or missing values the first and the second activity time Walk test:missing values for any assistance if the walk test was completed 5tsts:missing values for any assistance if the test was completed 5tsts:type of assistance unchecked
10135 uchicago functional_testing 1 error
10308 uchicago functional_testing 1 error error
10341 uchicago functional_testing 1 error
25014 university_of_mich functional_testing 1 error
25048 university_of_mich functional_testing 1 error
25051 university_of_mich functional_testing 1 error
25053 university_of_mich functional_testing 1 error error
25081 university_of_mich functional_testing 1 error
25090 university_of_mich functional_testing 2 error error
25095 university_of_mich functional_testing 1 error
25158 university_of_mich functional_testing 1 error
25166 university_of_mich functional_testing 1 error
25171 university_of_mich functional_testing 2 error
25224 university_of_mich functional_testing 1 error
10055 uchicago functional_testing 1 error
10363 uchicago functional_testing 1 error error
10696 uchicago functional_testing 1 error error
10040 uchicago functional_testing 1 error error error
10147 uchicago functional_testing 1 error
10688 northshore functional_testing 1 error error error
10598 uchicago functional_testing 1 error
25141 university_of_mich functional_testing 1 error
10321 uchicago functional_testing 1 error
10436 uchicago functional_testing 1 error
25108 university_of_mich functional_testing 2 error
25204 university_of_mich functional_testing 1 error
25013 university_of_mich functional_testing 1 error
F.2.5.2.12 Save:

Save “tka_func” and data dictionary as .csv files in the folder named “reformatted_functional”

write_csv(
  tka_func,
  file = here::here(
    "data",
    "functional-testing",
    "Reformatted",
    "reformatted_tka_func.csv"
  )
)

write_csv(
  func_dict,
  file = here::here(
    "data",
    "functional-testing",
    "Reformatted",
    "updated_func_dict.csv"
  )
)

F.2.6 Thoracotomy cohort Functional Assessment:

thor_func <- func %>%
  select(
    record_id,
    guid,
    redcap_data_access_group,
    redcap_event_name,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    starts_with("ftdbc"),
    functional_testing_mcc2_v01_complete,
    cohort
  )
thor_func <- thor_func %>%
  filter(cohort == "Thoracic") %>%
  filter(redcap_repeat_instrument == "functional_testing_mcc2_v01") %>%
  filter(
    functional_testing_mcc2_v01_complete == 2 & !is.na(redcap_repeat_instance)
  ) %>%
  group_by(record_id, redcap_event_name) %>%
  top_n(1, redcap_repeat_instance) %>%
  ungroup() %>%
  select(where(not_all_na))

Keep records of subjects who completed the test.

m2frfunc3 <- thor_func %>%
  filter(ftdbctestcmpltyn == 1)
F.2.6.0.1 Flag 1:

Check for discrepancy or missing values in the first and the second initial pain ratings.

m2frerror1 <- m2frfunc3 %>%
  mutate(init_pain_diff = ftdbcdeepbrthinitscl - ftdbcdeepbrthinitscl2) %>%
  filter(init_pain_diff != 0 | is.na(init_pain_diff)) %>%
  add_column(
    error_type = "Deep breathing & coughing:Discrepancy or missing values in the first and the second initial pain ratings"
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.6.0.2 Flag 2:

Check for discrepancy or missing values in the first and the second final pain ratings.

m2frerror2 <- m2frfunc3 %>%
  mutate(final_pain_diff = ftdbcdeepbrthfinalscl - ftdbcdeepbrthfinalscl2) %>%
  filter(final_pain_diff != 0 | is.na(final_pain_diff)) %>%
  add_column(
    error_type = "Deep breathing & coughing:Discrepancy or missing values in the first and the second initial pain ratings"
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.6.0.3 Flag 3:

Check for missing values or discrepancy between the first and the second cough pain.

m2frerror3 <- m2frfunc3 %>%
  mutate(cough_diff = ftdbccoughfinalscl - ftdbccoughfinalscl2) %>%
  filter(cough_diff != 0 | is.na(cough_diff)) %>%
  add_column(
    error_type = "Deep breathing & coughing: Missing values or discrepancy between the first and the second cough pain"
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.6.0.4 Flag 4:

Check if the reason for test not completed was not specified.

m2frerror4 <- thor_func %>%
  filter(ftdbctestcmpltyn == 0) %>%
  filter(is.na(ftdbctestcmpltno)) %>%
  add_column(
    error_type = "If the reason for test not completed was not specified"
  ) %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.6.0.5 Flag 5:

Check if the “functional_testing_mcc2_v01_complete” is missing but the test completion status is available.

m2frerror5 <- thor_func %>%
  filter(!is.na(ftdbctestcmpltyn)) %>%
  filter(is.na(functional_testing_mcc2_v01_complete)) %>%
  add_column(error_type = "missing functional_testing_mcc2_v01_complete") %>%
  add_column(errors = "error") %>%
  select(
    record_id,
    redcap_data_access_group,
    redcap_repeat_instrument,
    redcap_repeat_instance,
    error_type,
    errors
  )
F.2.6.0.6 Create functional assessment error report for the Thoracotomy cohort.
# Specify the common prefix
m2func_error <- "m2frerror"

# Find data frames in the global environment with the specified prefix
m2func_list <- mget(ls(pattern = paste0("^", m2func_error)))

# Combine the data frames using bind_rows
thor_func_report <- bind_rows(m2func_list) %>%
  pivot_wider(names_from = "error_type", values_from = "errors") %>%
  mutate_all(~ replace_na(., ""))
thor_func_report %>%
  gt() %>%
  tab_header(
    title = md(
      "**Thoracotomy Cohort Functional Assessment Error Report: No Errors**"
    )
  ) %>%
  tab_options(
    table.font.size = px(12),
    column_labels.font.size = px(12)
  ) %>%
  tab_style(
    style = list(cell_fill(color = "#F4F4F4")),
    locations = cells_body(columns = record_id)
  )
Thoracotomy Cohort Functional Assessment Error Report: No Errors
record_id redcap_data_access_group redcap_repeat_instrument redcap_repeat_instance
F.2.6.0.7 Biomarkers for the Thoraic cohort:

Post baseline data is needed to compute MEP for the thoracic cohort, hence could not be computed.

F.2.6.0.8 Save:

Save “thor_func” and data dictionary as .csv files in the folder named “reformatted_functional”

write_csv(
  thor_func,
  file = here::here(
    "data",
    "functional-testing",
    "Reformatted",
    "reformatted_thor_func.csv"
  )
)
write_csv(
  func_dict,
  file = here::here(
    "data",
    "functional-testing",
    "Reformatted",
    "updated_func_dict.csv"
  )
)