Appendix A — Exclude Scans with “Red” Quality

For each MRI product, all pipeline derivatives have been included. This means that products were included regardless of their quality, and so some products are known to have poor quality–rated “red” or incomparable. This kit walks through one way to incorporate those outputs for FreeSurfer products. For details on how the ratings were generated, see Section 4.1.2.

library(fs)
library(readr)
library(dplyr)
library(stringr)

First, start by loading the table of FreeSurfer outputs. In this example, the cortical parcellations will be used.

aparc <- read_tsv("data/aparc.tsv")
aparc
# A tibble: 399,992 × 14
   StructName       NumVert SurfArea GrayVol ThickAvg ThickStd MeanCurv GausCurv
   <chr>              <dbl>    <dbl>   <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
 1 bankssts            1360      909    2055     2.52    0.397    0.112    0.02 
 2 caudalanteriorc…     852      604    1626     2.25    0.887    0.131    0.024
 3 caudalmiddlefro…    2870     1857    4018     2.16    0.485    0.104    0.018
 4 cuneus              1595     1090    2019     1.74    0.515    0.141    0.027
 5 entorhinal           480      306    1366     3.03    0.816    0.105    0.018
 6 fusiform            3434     2414    8326     2.91    0.603    0.123    0.023
 7 inferiorparietal    4984     3387    7491     2.06    0.508    0.122    0.022
 8 inferiortemporal    3816     2614    8405     2.79    0.552    0.118    0.021
 9 isthmuscingulate    1167      782    1820     2.14    0.832    0.117    0.021
10 lateraloccipital    6159     4023    8578     2.02    0.536    0.132    0.027
# ℹ 399,982 more rows
# ℹ 6 more variables: FoldInd <dbl>, CurvInd <dbl>, sub <dbl>, ses <chr>,
#   hemisphere <chr>, parc <chr>

The overall quality ratings (red/yellow/green) for the rawdata are included in the *scans.tsv files. For example

$ cat sub-10003/ses-V1/sub-10003_ses-V1_scans.tsv 
filename    rating
func/sub-10003_ses-V1_task-cuff_run-01_bold.nii.gz  green
dwi/sub-10003_ses-V1_dwi.nii.gz green
func/sub-10003_ses-V1_task-rest_run-01_bold.nii.gz  green
func/sub-10003_ses-V1_task-rest_run-02_bold.nii.gz  green
anat/sub-10003_ses-V1_T1w.nii.gz    green
fmap/sub-10003_ses-V1_acq-fmrib0_dir-AP_epi.nii.gz  n/a
fmap/sub-10003_ses-V1_acq-fmrib0_dir-PA_epi.nii.gz  n/a
fmap/sub-10003_ses-V1_acq-dwib0_dir-AP_epi.nii.gz   n/a
fmap/sub-10003_ses-V1_acq-dwib0_dir-PA_epi.nii.gz   n/a

To load the files, first list them all with fs::dir_ls.

scan_files <- dir_ls("data/scans", glob = "*tsv")
head(scan_files)
data/scans/sub-10003_ses-V1_scans.tsv data/scans/sub-10005_ses-V1_scans.tsv 
data/scans/sub-10008_ses-V1_scans.tsv data/scans/sub-10010_ses-V1_scans.tsv 
data/scans/sub-10011_ses-V1_scans.tsv data/scans/sub-10013_ses-V1_scans.tsv 

They can be read and bound together using readr::read_tsv. Note that the BIDS way to represent null values is with 'n/a'.

scans <- read_tsv(scan_files, na = "n/a")
scans
# A tibble: 9,188 × 2
   filename                                           rating
   <chr>                                              <chr> 
 1 func/sub-10003_ses-V1_task-cuff_run-01_bold.nii.gz green 
 2 dwi/sub-10003_ses-V1_dwi.nii.gz                    green 
 3 func/sub-10003_ses-V1_task-rest_run-01_bold.nii.gz green 
 4 func/sub-10003_ses-V1_task-rest_run-02_bold.nii.gz green 
 5 anat/sub-10003_ses-V1_T1w.nii.gz                   green 
 6 fmap/sub-10003_ses-V1_acq-fmrib0_dir-AP_epi.nii.gz <NA>  
 7 fmap/sub-10003_ses-V1_acq-fmrib0_dir-PA_epi.nii.gz <NA>  
 8 fmap/sub-10003_ses-V1_acq-dwib0_dir-AP_epi.nii.gz  <NA>  
 9 fmap/sub-10003_ses-V1_acq-dwib0_dir-PA_epi.nii.gz  <NA>  
10 anat/sub-10005_ses-V1_T1w.nii.gz                   yellow
# ℹ 9,178 more rows

Ratings will be used to exclude some FreeSurfer products, and so we only need the ratings for the T1w scans.

t1w_ratings <- scans |>
  filter(str_detect(filename, "T1w"))

In this example, all scans with a rating “red” will be excluded, so filter for those scans.

red_t1w <- t1w_ratings |>
  filter(rating == "red")
red_t1w
# A tibble: 8 × 2
  filename                         rating
  <chr>                            <chr> 
1 anat/sub-10144_ses-V1_T1w.nii.gz red   
2 anat/sub-10216_ses-V1_T1w.nii.gz red   
3 anat/sub-10258_ses-V1_T1w.nii.gz red   
4 anat/sub-10267_ses-V1_T1w.nii.gz red   
5 anat/sub-10553_ses-V1_T1w.nii.gz red   
6 anat/sub-10559_ses-V1_T1w.nii.gz red   
7 anat/sub-20040_ses-V1_T1w.nii.gz red   
8 anat/sub-20224_ses-V1_T1w.nii.gz red   

The FreeSurfer rows are defined by the entities sub and ses, so extract these from the filename column.

red_t1w_w_entities <- red_t1w |>
  mutate(
    sub = str_extract(filename, "(?<=sub-)[[:digit:]]{5}") |>
      as.integer(),
    ses = str_extract(filename, "V[13]")
  )

Finally, this table with red ratings can be used to exclude participants with a low quality T1w scan.

aparc_wo_red <- aparc |>
  anti_join(red_t1w_w_entities, by = join_by(sub, ses))
aparc_wo_red
# A tibble: 396,776 × 14
   StructName       NumVert SurfArea GrayVol ThickAvg ThickStd MeanCurv GausCurv
   <chr>              <dbl>    <dbl>   <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
 1 bankssts            1360      909    2055     2.52    0.397    0.112    0.02 
 2 caudalanteriorc…     852      604    1626     2.25    0.887    0.131    0.024
 3 caudalmiddlefro…    2870     1857    4018     2.16    0.485    0.104    0.018
 4 cuneus              1595     1090    2019     1.74    0.515    0.141    0.027
 5 entorhinal           480      306    1366     3.03    0.816    0.105    0.018
 6 fusiform            3434     2414    8326     2.91    0.603    0.123    0.023
 7 inferiorparietal    4984     3387    7491     2.06    0.508    0.122    0.022
 8 inferiortemporal    3816     2614    8405     2.79    0.552    0.118    0.021
 9 isthmuscingulate    1167      782    1820     2.14    0.832    0.117    0.021
10 lateraloccipital    6159     4023    8578     2.02    0.536    0.132    0.027
# ℹ 396,766 more rows
# ℹ 6 more variables: FoldInd <dbl>, CurvInd <dbl>, sub <dbl>, ses <chr>,
#   hemisphere <chr>, parc <chr>