TIDAL Code


Download an R markdown script of the code below

Overview

Introduction

This script is to demonstrate some of the code that happens in the background of the application, using the simulated dataset and a simple linear model.

Data Formatting

Upload data in wide format and convert to long format

# Load in wide format dataframe

dataWide <- read.csv("data/emot_reg_emot_simulated.csv")

# Gather sdq_t columns into long format (this is a questionnaire score at each time point)
dataScore <- dataWide %>%
                gather(score_cat_test_col, score, all_of(c("sdq_t1", "sdq_t2", "sdq_t3", "sdq_t4", "sdq_t5"))) 

# Gather age_t columns into long format (this is age at each time point)
dataLong <- dataWide %>%
              gather(time_point, age, all_of(c("age_t1", "age_t2", "age_t3", "age_t4", "age_t5")))

# Merge them into one dataframe
dataLong[,"score"] <- dataScore[,"score"]

# Have a look at the dataframe
head(dataLong)
##   subject sdq_t1 sdq_t2 sdq_t3 sdq_t4 sdq_t5 female er_total_t1 er_total_t2
## 1       2      0      2     NA      0      0      1           1           6
## 2       3      2      3      1      3      2      0           4           4
## 3       4      0      0     NA      0      0      0           4           2
## 4       5      0      0      0      2     NA      0           4           5
## 5       6      3      1      1      1      1      0           2           2
## 6       8      0      1      1      4      2      0           5           2
##   er_t1_bin er_t2_bin er_bin time_point      age score
## 1         0         0      0     age_t1 3.041096     0
## 2         0         0      0     age_t1 3.126027     2
## 3         0         0      0     age_t1 3.172603     0
## 4         0         0      0     age_t1 3.049315     0
## 5         0         0      0     age_t1 3.065753     3
## 6         0         0      0     age_t1 3.027397     0

Data Exploration

Choose variables for modelling

# This is similar to the drop down selections on the Data Exploration page.
ID <- "subject"
traj <- "score"
age <- "age"
modelType <- "Linear"
timePoint <- "time_point"
# Mean center age (this helps the model fit)
dataLong <- dataLong %>%
          mutate(age_original = as.numeric(!!sym(age)) ) %>%
          mutate(!!sym(age) := as.numeric(!!sym(age)) - mean( as.numeric(!!sym(age)), na.rm = T ))

Descriptive Statistics

# Table:
dataLong %>%
          group_by(across( !!timePoint )) %>%
          summarise(N = sum(!is.na( !!sym(traj) )),
                    mean = mean(!!sym(traj), na.rm = T),
                    SD = sd(!!sym(traj), na.rm = T),
                    median = median(!!sym(traj), na.rm = T),
                    IQR = IQR(!!sym(traj), na.rm = T)
          )
## # A tibble: 5 × 6
##   time_point     N  mean    SD median   IQR
##   <chr>      <int> <dbl> <dbl>  <dbl> <dbl>
## 1 age_t1     12637  1.41  1.55      1     2
## 2 age_t2     12682  1.37  1.59      1     2
## 3 age_t3     10855  1.52  1.77      1     2
## 4 age_t4     10296  1.89  2.03      1     3
## 5 age_t5      9070  2.04  2.14      1     3
# Plot
df.plot <- dataLong %>%
          group_by(across( !!timePoint )) %>%
          summarise(Age = mean(age_original, na.rm = T),
                    Phenotype = mean(!!sym(traj), na.rm = T),
                    SD = sd(!!sym(traj), na.rm = T),
                    n = sum(!is.na( !!sym(traj) ))) %>%
          mutate(upper = Phenotype + ( qnorm(0.975)*SD/sqrt(n) ),
                 lower = Phenotype - ( qnorm(0.975)*SD/sqrt(n) ))

ggplot(df.plot,aes(x=Age, y=Phenotype)) +
  geom_point()+
  geom_line() +
  geom_errorbar(aes(ymin = lower, ymax = upper))+
  ylab(paste0("Score (", traj, ")")) +
  xlab("Age")

Model Results

Run the model using lme4

# Run the model using lmer
fit <- lmer(formula = "score ~ age + (1 + age | subject)",
                          REML=F ,
                          data = dataLong,
                          control=lmerControl(optimizer="bobyqa",
                                              optCtrl=list(maxfun=2e5)))
# Summary of the model:
summary(fit)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: score ~ age + (1 + age | subject)
##    Data: dataLong
## Control: lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##       AIC       BIC    logLik  deviance  df.resid 
##  208467.1  208520.6 -104227.5  208455.1     55531 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7416 -0.5239 -0.1961  0.4038  5.7814 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  subject  (Intercept) 1.4036   1.1847       
##           age         0.0223   0.1493   0.47
##  Residual             1.5628   1.2501       
## Number of obs: 55537, groups:  subject, 12720
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 1.618345   0.011963  135.28
## age         0.065644   0.002055   31.95
## 
## Correlation of Fixed Effects:
##     (Intr)
## age 0.323

N observations and groups

paste0("The number of observations (measurements) is ",
               format(summary(fit)$devcomp$dims[[1]], big.mark=",", scientific=FALSE),
               " and the number of groups (people) is ",
               format(summary(fit)$ngrps[[1]], big.mark=",", scientific=FALSE) ,
               ".")
## [1] "The number of observations (measurements) is 55,537 and the number of groups (people) is 12,720."

Fixed effects

cbind(
  tidy(fit, "fixed"),
  confint(fit, "beta_", method = "Wald")) %>%
  mutate(p.z = 2 * (1 - pnorm(abs(statistic)))) %>%
  mutate(p.z = ifelse(p.z <= 0, "p < 0.001", round(p.z, 3)))
##             effect        term   estimate   std.error statistic      2.5 %
## (Intercept)  fixed (Intercept) 1.61834513 0.011962990 135.27931 1.59489810
## age          fixed         age 0.06564445 0.002054591  31.95014 0.06161753
##                 97.5 %       p.z
## (Intercept) 1.64179216 p < 0.001
## age         0.06967138 p < 0.001

Random effects

randomDF <- as.data.frame(VarCorr(fit),
              order = "lower.tri") %>%
              mutate(across(where(is.numeric), round, 3))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(where(is.numeric), round, 3)`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))
colnames(randomDF) <- c("Level", "Variable1", "Variable2", "Variance/Covariance", "SD Variance/Covariance")
randomDF
##      Level   Variable1 Variable2 Variance/Covariance SD Variance/Covariance
## 1  subject (Intercept)      <NA>               1.404                  1.185
## 2  subject (Intercept)       age               0.083                  0.468
## 3  subject         age      <NA>               0.022                  0.149
## 4 Residual        <NA>      <NA>               1.563                  1.250

Deviance

# This is the model fit, you can compare this to similar models.
deviance(fit)
## [1] 208455.1

Plot

# Add the "prediction"/model col to dataframe (adjustedScore)
# The alternative is to use the "predict" function, but this didn't work when users wanted to add categorical covariates.
# We had to calculate it manually instead. Below is just an example of how you do this for a linear model with no covariates.

# Pull out the mean centered age column
ageVec <- dataLong %>% pull(!!age)

# Get the coefficients from the model
coef <- summary(fit)$coefficients

# Calculate the model mean/prediciton. This is age multiplied by the slope plus the intercept.
adjustedScore <- ageVec * coef[2,1] + coef[1,1]

# Add this as a new column to the dataframe
modelDataEdit <- dataLong %>%
                  mutate(pred = adjustedScore)

# Add 95% CIs for these estimates using glht
ageOrig <- dataLong %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]

# Get a vector of the ages we want to calculate 95% CIs for (instead of doing all the ages in the column)
ageCalcs <- c(min(ageOrig), seq(round(min(ageOrig), 1), round(max(ageOrig), 1), 0.5), max(ageOrig) )

# Go through each age and calculate the 95% CI
score_glht <- lapply(ageCalcs, function(x){

    # Get the row names of the coefficients to put in the glht function (this is Intercept and age)
    rowNames <- rownames(summary(fit)$coefficients)
    
    # Round age to 3 decimal places
    ageInput <- round(x - mean(ageOrig), 3)

    # Use glht to get 95% CIs
    # Essentially we are doing the intercept + slope * age == 0 as it is a linear model
    res <- multcomp::glht(fit, linfct = c( paste0(rowNames[1], " + ", rowNames[2], "*", ageInput, " == 0")) )

    # Tidy up the dataframe
    res <- tidy(confint(res))
    rowname <- paste0("Score (", traj, ")")
    res <-  res %>%
              mutate(contrast = paste0(rowname, " (95% CIs)")) %>%
              column_to_rownames(var = "contrast") %>%
              mutate(across(where(is.numeric), round, 2))
          
    return(res)
})

# Put everything into one dataframe
estimate <- do.call(rbind, score_glht) %>%
                      mutate(age = ageCalcs)

# Plot the mean trajectory and it's 95% CIs and the underlying descriptive data:
ggplot() +
          geom_line(data = modelDataEdit, aes(x= age_original ,  y = pred), color = "#1D86C7", linewidth = 1.5, na.rm=T) +
          geom_ribbon(data = estimate, aes(x= age , ymin = conf.low, ymax = conf.high), fill = "#1D86C7", alpha = 0.2, na.rm = T) +
          geom_point(data = df.plot,aes(x=Age, y=Phenotype))+
          geom_line(data = df.plot,aes(x=Age, y=Phenotype)) +
          geom_errorbar(data = df.plot, aes(x=Age, y=Phenotype, ymin = lower, ymax = upper)) +
          ylab(paste0("Score (", traj, ")")) +
          xlab("Age")

Scores at ages

# Choose some ages to calculate (these are equivalent to the checkboxes on the app)
ageInput <- c(4, 7, 12)

# Go through each age and let's calculate the score at that age using glht (similar to getting 95% CIs as above)
score <- lapply(as.numeric(ageInput), function(x){

          # Get the row names that correspond to the coefficients, ie. Intercept, Age (slope)
          rowNames <- rownames(summary(fit)$coefficients)

          # Round age and mean center it so it's on the correct scale
          ageInputRound <- round(x - mean(ageOrig), 3)
        
          # Essentially we are doing the intercept + slope * age == 0 as it is a linear model
          res <- multcomp::glht(fit, linfct = c( paste0(rowNames[1], " + ", rowNames[2], "*", ageInputRound, " == 0")) )

          # Tidy up the data frame by reformatting it and labelling it sensibly
          res <- tidy(confint(res))

          rowname <- paste0("Score (", traj, ")")

          res <-  res %>%
            mutate(contrast = paste0(rowname, " (95% CIs)")) %>%
            column_to_rownames(var = "contrast")
})

# Tidy it up into one dataframe
estimate <- lapply(score, function(df) {
   df %>%
     dplyr::select(estimate)
 })  %>% do.call(cbind, .)

colnames(estimate) <- ageInput
estimate <- estimate %>%
          gather(age, score, 1:ncol(estimate)) %>%
          mutate(age = as.numeric(age))

# Put the CIs into separate dataframes ready for plotting
conf.low <- lapply(score, function(df) {df %>% dplyr::select(conf.low)}) %>% do.call(cbind, .)
colnames(conf.low) <- ageInput
conf.low  <- conf.low %>%
          gather(age, conf.low, 1:ncol(conf.low)) %>%
          mutate(age = as.numeric(age))

conf.high <- lapply(score, function(df) {df %>% dplyr::select(conf.high)}) %>% do.call(cbind, .)
colnames(conf.high) <- ageInput
conf.high  <- conf.high %>%
          gather(age, conf.high, 1:ncol(conf.high)) %>%
          mutate(age = as.numeric(age))%>%
          dplyr::select(-age)

conf <- cbind(conf.low, conf.high, "age")

ggplot() +
  geom_line(data = modelDataEdit, aes(x= age_original ,  y = pred  ) , na.rm=T) +
  theme(legend.text = element_text(color = "black")) +
  geom_errorbar(data = conf, aes(x = age, ymin = conf.low, ymax = conf.high), width = 0.5) +
  geom_point(data = estimate, aes(x = age, y = score), col = "#1D86C7", size = 5) +
  ylab(paste0("Score (", traj, ")")) +
  xlab("Age")

# Tidy up the estimate and 95% CIs to display nicely to the user
estimateCI <- lapply(score, function(df) {
  df %>%
    mutate(estimateCI = paste0(round(estimate,3) , " (", round(conf.low,3) , " - ", round(conf.high,3) , ")")) %>%
    dplyr::select(estimateCI)
})  %>% do.call(cbind, .)
colnames(estimateCI) <- ageInput
estimateCI        
##                                            4                    7
## Score (score) (95% CIs) 1.383 (1.36 - 1.406) 1.58 (1.557 - 1.603)
##                                            12
## Score (score) (95% CIs) 1.908 (1.875 - 1.942)

Area under the curve

# Choose 2 ages to calculate AUC for (this is the slider on the app)
AUCages <- c(4,10)

# Calculate the AUC for the ages the user has chosen
coef <- summary(fit)$coefficients

# Pull out the age column from the dataframe
ageOrig <- modelDataEdit %>%
              pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]

# Minus mean age so the age is on the same scale as that used in the model
age1 <- AUCages[1] - mean(ageOrig)
age2 <- AUCages[2] - mean(ageOrig)

# Remove any characters that means the deltaMethod function wont work
rowNames <- rownames(coef) %>%
              str_remove_all("I|\\(|\\^|\\)")

# Use the delta method to calculate AUC 
# AUC is calculate by doing age2 (the larger age) * intercept + slope * age2^2/2 minus age1 (the younger age) * intercept + slope * age2^2/2. This is for a linear model.
AUC <- car::deltaMethod(fit, c( paste0("(((", age2, ")*(", rowNames[1], ")) + ((", rowNames[2], ")*(", age2, ")^2/2)) - (((", age1,")*(", rowNames[1], ")) + ((", rowNames[2], ")*(", age1, ")^2/2))") ) , parameterNames = rowNames)

# Format the results
AUC <- paste0( round(AUC$Estimate, 2), " (", round(AUC$`2.5 %`,2), " - ", round(AUC$`97.5 %`,2), ")")

# Plot the results
ggplot() +
          geom_ribbon(data = modelDataEdit,
                      aes(x = age_original, ymax = pred, ymin = 0),
                      alpha = 0.1, show.legend = FALSE, fill = "#1D86C7") +
          geom_line(data = modelDataEdit, aes(x= age_original ,  y = pred ) , na.rm=T)+
          coord_cartesian(xlim = c(AUCages[1], AUCages[2])) +
          scale_colour_discrete(na.translate = F) +
          theme(legend.text = element_text(color = "black")) +
          ylab(paste0("Score (", traj, ")")) +
          xlab("Age") +
          scale_x_continuous(breaks = seq(round(min(modelDataEdit$age_original, na.rm =T)), round(max(modelDataEdit$age_original, na.rm =T)), by = 1),
                             expand = c(0, 0)) +
          scale_y_continuous(expand = c(0, 0))

# Format the results to display to the user
df <- t(data.frame(paste0(AUCages[1], " - ", AUCages[2]),
                     AUC))

rowname <- paste0("AUC (", traj, ") (95% CIs)")
rownames(df) <- c("Age Range", rowname)
df
##                       [,1]                
## Age Range             "4 - 10"            
## AUC (score) (95% CIs) "9.48 (9.34 - 9.62)"

Interaction Variable (factor with two levels)

Choose variable

# Demo shown for categorical variable
# Split the trajectory by the sex variable
condition <- "female"

# Get the index for the column we want to split the trajectory on
colSplit <- which(colnames(dataLong) %in% condition)

# Make sure this column is a factor and replace some characters that may cause a problem with an underscore
modelData <- dataLong %>%
            filter(!is.na(!!sym(condition))) %>%
            mutate(!!condition := factor( str_replace_all(.[[colSplit]], " |-|/", "_") ) ) 

Model Results

# Run the model using lmer
fit <- lmer(formula = "score ~ age + female + age*female + (1 + age | subject)",
                          REML=F ,
                          data = modelData,
                          control=lmerControl(optimizer="bobyqa",
                                              optCtrl=list(maxfun=2e5)))
# Summary of the model:
summary(fit)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: score ~ age + female + age * female + (1 + age | subject)
##    Data: modelData
## Control: lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##       AIC       BIC    logLik  deviance  df.resid 
##  208365.6  208437.0 -104174.8  208349.6     55529 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7675 -0.5272 -0.1934  0.4059  5.7681 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  subject  (Intercept) 1.39424  1.1808       
##           age         0.02194  0.1481   0.46
##  Residual             1.56320  1.2503       
## Number of obs: 55537, groups:  subject, 12720
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 1.526786   0.016785  90.961
## age         0.047477   0.002888  16.441
## female1     0.184427   0.023863   7.729
## age:female1 0.036473   0.004093   8.911
## 
## Correlation of Fixed Effects:
##             (Intr) age    femal1
## age          0.322              
## female1     -0.703 -0.226       
## age:female1 -0.227 -0.705  0.319
# Fixed effects
cbind(
            tidy(fit, "fixed"),
            confint(fit, "beta_", method = "Wald")) %>%
            mutate(p.z = 2 * (1 - pnorm(abs(statistic)))) %>%
            mutate(p.z = ifelse(p.z < 0.001, "p < 0.001", round(p.z, 3) ))
##             effect        term   estimate   std.error statistic      2.5 %
## (Intercept)  fixed (Intercept) 1.52678648 0.016785153 90.960536 1.49388818
## age          fixed         age 0.04747730 0.002887690 16.441272 0.04181753
## female1      fixed     female1 0.18442702 0.023862626  7.728697 0.13765713
## age:female1  fixed age:female1 0.03647303 0.004093226  8.910585 0.02845046
##                 97.5 %       p.z
## (Intercept) 1.55968477 p < 0.001
## age         0.05313707 p < 0.001
## female1     0.23119691 p < 0.001
## age:female1 0.04449561 p < 0.001
# Random effects
randomDF <- as.data.frame(VarCorr(fit),
                        order = "lower.tri") %>%
            mutate(across(where(is.numeric), round, 3))
          colnames(randomDF) <- c("Level", "Variable1", "Variable2", "Variance/Covariance", "SD Variance/Covariance")
          randomDF
##      Level   Variable1 Variable2 Variance/Covariance SD Variance/Covariance
## 1  subject (Intercept)      <NA>               1.394                  1.181
## 2  subject (Intercept)       age               0.081                  0.464
## 3  subject         age      <NA>               0.022                  0.148
## 4 Residual        <NA>      <NA>               1.563                  1.250
# add the "predicted" column to this dataset (it's not really a prediction because its the same dataset, it just shows the model)

# Pull out the age column
ageVec <- modelData %>% pull(!!age)

# Get the coefficients of the model
coef <- summary(fit)$coefficients

# Get the predicted column (note we did this manually in case covariates were added). If covariates were added then essentially this predicted "zero" variable is when the covariates are set to zero.

zero <- ageVec * coef[2,1] + coef[1,1]

# Get the row index from the coefficients table that correspond to the condition we are splitting on
rowIndex <- which(str_detect(string = row.names(coef),
                             pattern = condition) &
                  str_detect(string = row.names(coef),
                             pattern = ":", negate = T))

# Same as above but for the interaction term
rowIndexInteract1 <- which(str_detect(string = row.names(coef),
                                              pattern = condition) &
                                     str_detect(string = row.names(coef),
                                                pattern = ":") &
                                     str_detect(string = row.names(coef),
                                                pattern = "\\^", negate = T))

# How many levels does out condition have, eg. 2 if options are only male or female in this example
 n <- length(unique(pull(modelData, !!sym(condition))))
 
 # Calculate the prediction for the other level, ie. when the covariate is not set to zero
 predCovs <- lapply(1:(n-1), function(i){

              coef[1,1] +
              (ageVec * coef[2,1]) +
              coef[rowIndex[i],1] +
              (ageVec * coef[rowIndexInteract1[i],1])
 })

# Name of level, in this case female is coded as 0 or 1
num <- str_subset(row.names(coef), condition) %>%
        sub(paste0(".*", condition), "", .) %>%
        unique()

# Name the list by the the level it corresponds to
names(predCovs) <- paste0(condition, "_", num)

# Add these as new columns to the dataset and make a pred column which gives the correct predicted value for the corresponding condition level, ie. the female predicted value if the participant is female.
modelDataEdit <- cbind(modelData, do.call(cbind, predCovs)) %>%
            mutate(zero = zero) %>%
            mutate(!!condition := as.factor(.[[colSplit]]) ) %>%
            mutate(pred =  eval(parse(text =
                                        paste0(paste0("ifelse(", condition, " == '", num, "', ", condition, "_",num,",", collapse = " "), "zero",
                                               paste0(rep(")", length(num)), collapse = ""), collapse = "")
            )))

Plot

# use glht to calculate 95% CIs

# Pull out the age column      
ageOrig <- modelDataEdit %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]

# Determine which ages we want to calculate 95% CI for
# Don't need to do it for every participant as age when age is fairly similar, instead do it as a sequence of 0.5 for every unit in age.
# This speeds up the calculation time
ageCalcs <-  c(min(ageOrig), seq(round(min(ageOrig), 1), round(max(ageOrig), 1), 0.5), max(ageOrig) )

# Go through each age and calculate 95% CI
CI_glht  <- lapply(ageCalcs, function(x){

          # Get number of levels in condition
          n <- length(unique(pull(modelDataEdit, !!sym(condition))))

          # Get corresponding rows for condition in coefficients table
          rowIndex <- which(str_detect(string = row.names(summary(fit)$coefficients),
                                       pattern = condition) &
                              str_detect(string = row.names(summary(fit)$coefficients),
                                         pattern = ":", negate = T))

          rowNames <- rownames(summary(fit)$coefficients)

          ageInput <- round(x - mean(ageOrig), 3)

          equations <- sapply(1:(n-1), function(i){
                paste0(rowNames[1], " + \`", rowNames[2], "\`*", ageInput, " + \`", rowNames[rowIndex[i]] , "\` + \`", rowNames[2], ":", rowNames[rowIndex[i]],"\`*",ageInput , " == 0")
              })
          res <- multcomp::glht(fit, linfct = c( paste0(rowNames[1], " + \`", rowNames[2], "\`*", ageInput, " == 0"), equations) )
          
          # Tidy results dataframe and rename rows/columns
          res <- tidy(confint(res))

          rowname <- paste0("Score (", traj, ")")
          
          levelNames <- as.character(levels(as.factor(pull(modelDataEdit, !!sym(condition)))))

            res <-  res %>%
              mutate(condition = levelNames)
          
            })

ageOrig <- modelDataEdit %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
ageCalcs <-  c(min(ageOrig), seq(round(min(ageOrig), 1), round(max(ageOrig), 1), 0.5), max(ageOrig) )
levelNames <- as.character(levels(as.factor(pull(modelDataEdit, !!sym(condition)))))

estimate <- do.call(rbind, CI_glht) %>%
          mutate(age = rep(ageCalcs, each = length(levelNames)) ) 
ggplot() +
              geom_line(data = modelDataEdit, aes(x= age_original ,  y = pred, color = !!sym(condition) ) , na.rm=T) +
              geom_ribbon(data = estimate ,
                          aes(x= age , ymin = conf.low, ymax = conf.high, fill = condition), alpha = 0.2, na.rm = T, show.legend = FALSE) +
              scale_color_manual(values = c("orange", "blue")) +
              scale_fill_manual(values = c("orange", "blue")) +
              geom_point(data = df.plot,aes(x=Age, y=Phenotype))+
              geom_line(data = df.plot,aes(x=Age, y=Phenotype)) +
              geom_errorbar(data = df.plot, aes(x=Age, y=Phenotype, ymin = lower, ymax = upper)) +
              theme(legend.text = element_text(color = "black"))+
              ylab(paste0("Score (", traj, ")")) +
              xlab("Age")

Scores at ages

# Select ages to calculate scores for (same as the checkboxes on the app)
ageInputScore <- c(4, 9, 14)

# use glht to calculate scores at ages

ageOrig <- modelDataEdit %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]

score_glht <- lapply(as.numeric(ageInputScore), function(x){

          n <- length(unique(pull(modelDataEdit, !!sym(condition))))

          rowIndex <- which(str_detect(string = row.names(summary(fit)$coefficients),
                                       pattern = condition) &
                              str_detect(string = row.names(summary(fit)$coefficients),
                                         pattern = ":", negate = T))

          rowNames <- rownames(summary(fit)$coefficients)

          ageInput <- round(x - mean(ageOrig), 3)

          equations <- sapply(1:(n-1), function(i){
                paste0(rowNames[1], " + ", rowNames[2], "*", ageInput, " + ", rowNames[rowIndex[i]] , " + ", rowNames[2], ":", rowNames[rowIndex[i]],"*",ageInput , " == 0")
              })
            res <- multcomp::glht(fit, linfct = c( paste0(rowNames[1], " + ", rowNames[2], "*", ageInput, " == 0"), equations) )
 
          # Tidy results dataframe and rename rows/columns
          res <- tidy(confint(res))

          rowname <- paste0("Score (", traj, ")")

          levelNames <- as.character(levels(as.factor(pull(modelDataEdit, !!sym(condition)))))

          res <-  res %>%
            mutate(contrast = paste0(rowname, " [", condition, ", level = ", levelNames, " ] (95% CIs)")) %>%
            column_to_rownames(var = "contrast")
})

# Plot the scores at the given age        
estimate <- lapply(score_glht, function(df) {
            df %>%
              dplyr::select(estimate)
          })  %>% do.call(cbind, .)

colnames(estimate) <- ageInputScore

estimate <- estimate %>%
              gather(age, score, 1:ncol(estimate)) %>%
              mutate(age = as.numeric(age))

conf.low <- lapply(score_glht, function(df) {df %>% dplyr::select(conf.low)}) %>% do.call(cbind, .)
colnames(conf.low) <- ageInputScore
conf.low  <- conf.low %>%
              gather(age, conf.low, 1:ncol(conf.low)) %>%
              mutate(age = as.numeric(age))

conf.high <- lapply(score_glht, function(df) {df %>% dplyr::select(conf.high)}) %>% do.call(cbind, .)
colnames(conf.high) <- ageInputScore
conf.high  <- conf.high %>%
              gather(age, conf.high, 1:ncol(conf.high)) %>%
              mutate(age = as.numeric(age))%>%
              dplyr::select(-age)

conf <- cbind(conf.low, conf.high, "age")

# Plot
ggplot() +
   geom_line(data = modelDataEdit, aes(x= age_original ,  y = pred, color = !!sym(condition) ) , na.rm=T) +
   theme(legend.text = element_text(color = "black")) +
   geom_errorbar(data = conf, aes(x = age, ymin = conf.low, ymax = conf.high), width = 0.5) +
   geom_point(data = estimate, aes(x = age, y = score), col = "#1D86C7", size = 5) +
   ylab(paste0("Score (", traj, ")")) +
   xlab("Age")

# Table
estimateCI <- lapply(score_glht, function(df) {
            df %>%
              mutate(estimateCI = paste0(round(estimate,3) , " (", round(conf.low,3) , " - ", round(conf.high,3) , ")")) %>%
              dplyr::select(estimateCI)
          })  %>% do.call(cbind, .)
colnames(estimateCI) <- ageInputScore
estimateCI
##                                                                  4
## Score (score) [female, level = 0 ] (95% CIs)  1.357 (1.32 - 1.394)
## Score (score) [female, level = 1 ] (95% CIs) 1.411 (1.373 - 1.448)
##                                                                  9
## Score (score) [female, level = 0 ] (95% CIs) 1.594 (1.553 - 1.636)
## Score (score) [female, level = 1 ] (95% CIs)  1.83 (1.789 - 1.872)
##                                                                 14
## Score (score) [female, level = 0 ] (95% CIs) 1.832 (1.767 - 1.896)
## Score (score) [female, level = 1 ] (95% CIs)  2.25 (2.186 - 2.315)
# Difference in scores at ages (with 95% CIs)
levelsScores <- c(0,1)
differenceScores <- lapply(as.numeric(ageInputScore), function(x){
            ageInput <- round(x - mean(ageOrig), 3)

            coef <- summary(fit)$coefficients

            rowIndex <- which(str_detect(string = row.names(coef),
                                         pattern = condition) &
                                str_detect(string = row.names(coef),
                                           pattern = ":", negate = T))

            rowNames <- rownames(coef) %>%
              str_remove_all("I|\\(|\\^|\\)|\\:")

            levelNames <- paste0(condition,levelsScores) %>%
              str_remove_all("I|\\(|\\^|\\)|\\:")

            levelNames1 <- rowNames[str_detect(rowNames, paste0(levelNames, collapse = "|"))]

            res <- deltaMethod(fit, c(paste0(
                  "(", rowNames[1], " + ", rowNames[2], "*", ageInput, " + ", levelNames1[1], " + ", levelNames1[2], "*", ageInput, ") - (",rowNames[1], " + ", rowNames[2], "*", ageInput, ")"
                )), parameterNames = rowNames )

            dif <- paste0( round(res$Estimate, 3), " (", round(res$`2.5 %`,3), " - ", round(res$`97.5 %`,3), ")")
            res <- data.frame(age = dif)
return(res)
})   

difTab <- do.call(cbind, differenceScores)
colnames(difTab) <- ageInputScore
rownames(difTab) <- paste0("Difference between ",levelsScores[1]," and ", levelsScores[2]," (95% CI)")
difTab
##                                                       4                     9
## Difference between 0 and 1 (95% CI) 0.054 (0.007 - 0.1) 0.236 (0.185 - 0.288)
##                                                        14
## Difference between 0 and 1 (95% CI) 0.419 (0.339 - 0.498)

Area under the curve

# Select ages to calculate AUC for (same as the slider bar on the app)
AUCages <- c(4,10)

coef <- summary(fit)$coefficients

ageOrig <- modelDataEdit %>%
          pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
age1 <- AUCages[1] - mean(ageOrig)
age2 <- AUCages[2] - mean(ageOrig)

rowNames <- rownames(coef) %>%
          str_remove_all("I|\\(|\\^|\\)|\\:")

AUC <- deltaMethod(fit, c( paste0("(((", age2, ")*(", rowNames[1], ")) + ((", rowNames[2], ")*(", age2, ")^2/2)) - (((", age1,")*(", rowNames[1], ")) + ((", rowNames[2], ")*(", age1, ")^2/2))") ) , parameterNames = rowNames)

AUC <- paste0( round(AUC$Estimate, 2), " (", round(AUC$`2.5 %`,2), " - ", round(AUC$`97.5 %`,2), ")")
        
rowIndex <- which(str_detect(string = row.names(summary(fit)$coefficients),
                                   pattern = condition) &
                          str_detect(string = row.names(summary(fit)$coefficients),
                                     pattern = ":", negate = T))

rowIndexInteract1 <- which(str_detect(string = row.names(summary(fit)$coefficients),
                                            pattern = condition) &
                                   str_detect(string = row.names(summary(fit)$coefficients),
                                              pattern = ":") &
                                   str_detect(string = row.names(summary(fit)$coefficients),
                                              pattern = "\\^", negate = T))
      
n <- length(unique(pull(modelDataEdit, !!sym(condition))))

AUCCovs <- lapply(1:(n-1), function(i){
         
            AUC <- deltaMethod(fit,
                               c( paste0(
                                 "(((",age2,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age2,")^2/2) + ((",rowNames[rowIndex[i]],")*(",age2,")) + ((",rowNames[rowIndexInteract1[i]],")*(",age2,")^2/2) ) -
                                   (((",age1,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age1,")^2/2) + ((",rowNames[rowIndex[i]],")*(",age1,")) + ((",rowNames[rowIndexInteract1[i]],")*(",age1,")^2/2) )"
                               ) )
                               , parameterNames = rowNames )

            AUC <- paste0( round(AUC$Estimate, 2), " (", round(AUC$`2.5 %`,2), " - ", round(AUC$`97.5 %`,2), ")")
            AUC
        })
        
AUC_delta <- list(AUC = AUC, AUCCovs = AUCCovs)
       
# Plot
ggplot(data = modelDataEdit) +
            geom_ribbon(data = modelDataEdit,
                        aes(x = age_original, ymax = pred, ymin = 0, fill = !!sym(condition)),
                        alpha = 0.1, show.legend = FALSE) +
            coord_cartesian(xlim = c(AUCages[1], AUCages[2])) +
            geom_line(aes(x = age_original, y = pred, color = !!sym(condition)), na.rm = TRUE) +
            theme(legend.text = element_text(color = "black")) +
            ylab(paste0("Score (", traj, ")")) +
            xlab("Age") +
            scale_x_continuous(breaks = seq(round(min(modelDataEdit$age_original, na.rm =T)), round(max(modelDataEdit$age_original, na.rm =T)), by = 1),
                               expand = c(0, 0))+
            scale_y_continuous(expand = c(0, 0))

# Table
df <- t(
            cbind(
              data.frame(paste0(AUCages[1], " - ", AUCages[2]),
                         AUC_delta$AUC),
              do.call(cbind, AUC_delta$AUCCovs)
            )
)

rowname <- paste0("AUC (", traj, ")")
levelNames <- as.character(levels(as.factor(pull(modelDataEdit, condition))))
rownames(df) <- c("Age Range", paste0(rowname, " [", condition, ", level = ", levelNames, " ] (95% CIs)") )
df
##                                            [,1]                 
## Age Range                                  "4 - 10"             
## AUC (score) [female, level = 0 ] (95% CIs) "9 (8.8 - 9.19)"     
## AUC (score) [female, level = 1 ] (95% CIs) "9.97 (9.78 - 10.17)"
# -------
# Difference in AUC
levelsAUC <- c(0,1)

levelNames <- paste0(condition,levelsAUC) %>%
          str_remove_all("I|\\(|\\^|\\)|\\:")

coef <- summary(fit)$coefficients

ageOrig <- modelDataEdit %>%
          pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
age1 <- AUCages[1] - mean(ageOrig)
age2 <- AUCages[2] - mean(ageOrig)

rowNames <- rownames(coef) %>%
          str_remove_all("I|\\(|\\^|\\)|\\:")

levelNames1 <- rowNames[str_detect(rowNames, paste0(levelNames, collapse = "|"))]

res <- deltaMethod(fit, c( paste0(" (( ((",age2,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age2,")^2/2)   + ( (",levelNames1[1],")*(",age2,") ) + ((",levelNames1[2],")*(",age2,")^2/2) ) - ( ((",age1,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age1,")^2/2)   + ( (",levelNames1[1],")*(",age1,") ) + ((",levelNames1[2],")*(",age1,")^2/2) )) - (( ((",age2,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age2,")^2/2) ) - ( ((",age1,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age1,")^2/2)     )) ") ) ,parameterNames = rowNames)
    
dif <- paste0( round(res$Estimate, 2), " 95% CI: (", round(res$`2.5 %`,2), " - ", round(res$`97.5 %`,2), ")")
statement <- paste0("The difference between the two factor levels (for the age ranges ", AUCages[1], " - ", AUCages[2] ,") is ", dif ,".")
statement
## [1] "The difference between the two factor levels (for the age ranges 4 - 10) is 0.98 95% CI: (0.71 - 1.25)."

Interaction Variable (factor with four levels)

Choose variable

# Demo shown for categorical variable
# Split the trajectory by the er_bin variable
condition <- "er_bin"

# Get the index for the column we want to split the trajectory on
colSplit <- which(colnames(dataLong) %in% condition)

# Make sure this column is a factor and replace some characters that may cause a problem with an underscore
modelData <- dataLong %>%
            filter(!is.na(!!sym(condition))) %>%
            mutate(!!condition := factor( str_replace_all(.[[colSplit]], " |-|/", "_") ) ) 

Model Results

# Run the model using lmer
fit <- lmer(formula = "score ~ age + er_bin + age*er_bin + (1 + age | subject)",
                          REML=F ,
                          data = modelData,
                          control=lmerControl(optimizer="bobyqa",
                                              optCtrl=list(maxfun=2e5)))
# Summary of the model:
summary(fit)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: score ~ age + er_bin + age * er_bin + (1 + age | subject)
##    Data: modelData
## Control: lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##       AIC       BIC    logLik  deviance  df.resid 
##  206968.5  207075.6 -103472.2  206944.5     55525 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6767 -0.5290 -0.1954  0.4083  5.7020 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  subject  (Intercept) 1.22111  1.1050       
##           age         0.02218  0.1489   0.50
##  Residual             1.56268  1.2501       
## Number of obs: 55537, groups:  subject, 12720
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)  1.398532   0.013044 107.218
## age          0.065771   0.002357  27.901
## er_bin1      1.146156   0.048045  23.856
## er_bin2      0.565151   0.034293  16.480
## er_bin3      1.406815   0.049819  28.239
## age:er_bin1  0.039434   0.008644   4.562
## age:er_bin2 -0.016025   0.006197  -2.586
## age:er_bin3 -0.007698   0.009110  -0.845
## 
## Correlation of Fixed Effects:
##             (Intr) age    er_bn1 er_bn2 er_bn3 ag:r_1 ag:r_2
## age          0.337                                          
## er_bin1     -0.271 -0.091                                   
## er_bin2     -0.380 -0.128  0.103                            
## er_bin3     -0.262 -0.088  0.071  0.100                     
## age:er_bin1 -0.092 -0.273  0.331  0.035  0.024              
## age:er_bin2 -0.128 -0.380  0.035  0.336  0.034  0.104       
## age:er_bin3 -0.087 -0.259  0.024  0.033  0.344  0.071  0.098
# Fixed effects
cbind(
            tidy(fit, "fixed"),
            confint(fit, "beta_", method = "Wald")) %>%
            mutate(p.z = 2 * (1 - pnorm(abs(statistic)))) %>%
            mutate(p.z = ifelse(p.z < 0.001, "p < 0.001", round(p.z, 3) ))
##             effect        term     estimate   std.error   statistic       2.5 %
## (Intercept)  fixed (Intercept)  1.398532016 0.013043794 107.2181905  1.37296665
## age          fixed         age  0.065771213 0.002357291  27.9011799  0.06115101
## er_bin1      fixed     er_bin1  1.146155877 0.048044798  23.8559831  1.05198980
## er_bin2      fixed     er_bin2  0.565151267 0.034293043  16.4800559  0.49793814
## er_bin3      fixed     er_bin3  1.406815244 0.049819026  28.2385134  1.30917175
## age:er_bin1  fixed age:er_bin1  0.039433875 0.008643715   4.5621443  0.02249251
## age:er_bin2  fixed age:er_bin2 -0.016024970 0.006196509  -2.5861287 -0.02816990
## age:er_bin3  fixed age:er_bin3 -0.007698113 0.009110100  -0.8450086 -0.02555358
##                   97.5 %       p.z
## (Intercept)  1.424097383 p < 0.001
## age          0.070391419 p < 0.001
## er_bin1      1.240321950 p < 0.001
## er_bin2      0.632364396 p < 0.001
## er_bin3      1.504458742 p < 0.001
## age:er_bin1  0.056375246 p < 0.001
## age:er_bin2 -0.003880035      0.01
## age:er_bin3  0.010157354     0.398
# Random effects
randomDF <- as.data.frame(VarCorr(fit),
                        order = "lower.tri") %>%
            mutate(across(where(is.numeric), round, 3))
colnames(randomDF) <- c("Level", "Variable1", "Variable2", "Variance/Covariance", "SD Variance/Covariance")
randomDF
##      Level   Variable1 Variable2 Variance/Covariance SD Variance/Covariance
## 1  subject (Intercept)      <NA>               1.221                  1.105
## 2  subject (Intercept)       age               0.082                  0.497
## 3  subject         age      <NA>               0.022                  0.149
## 4 Residual        <NA>      <NA>               1.563                  1.250
# add the "predicted" column to this dataset (it's not really a prediction because its the same dataset, it just shows the model)

# Pull out the age column
ageVec <- modelData %>% pull(!!age)

# Get the coefficients of the model
coef <- summary(fit)$coefficients

# Get the predicted column (note we did this manually in case covariates were added). If covariates were added then essentially this predicted "zero" variable is when the covariates are set to zero.

zero <- ageVec * coef[2,1] + coef[1,1]

# Get the row index from the coefficients table that correspond to the condition we are splitting on
rowIndex <- which(str_detect(string = row.names(coef),
                             pattern = condition) &
                  str_detect(string = row.names(coef),
                             pattern = ":", negate = T))

# Same as above but for the interaction term
rowIndexInteract1 <- which(str_detect(string = row.names(coef),
                                              pattern = condition) &
                                     str_detect(string = row.names(coef),
                                                pattern = ":") &
                                     str_detect(string = row.names(coef),
                                                pattern = "\\^", negate = T))

# How many levels does out condition have, eg. 2 if options are only male or female in this example
 n <- length(unique(pull(modelData, !!sym(condition))))
 
 # Calculate the prediction for the other level, ie. when the covariate is not set to zero
 predCovs <- lapply(1:(n-1), function(i){

              coef[1,1] +
              (ageVec * coef[2,1]) +
              coef[rowIndex[i],1] +
              (ageVec * coef[rowIndexInteract1[i],1])
 })

# Name of level, in this case female is coded as 0 or 1
num <- str_subset(row.names(coef), condition) %>%
        sub(paste0(".*", condition), "", .) %>%
        unique()

# Name the list by the the level it corresponds to
names(predCovs) <- paste0(condition, "_", num)

# Add these as new columns to the dataset and make a pred column which gives the correct predicted value for the corresponding condition level, ie. the female predicted value if the participant is female.
modelDataEdit <- cbind(modelData, do.call(cbind, predCovs)) %>%
            mutate(zero = zero) %>%
            mutate(!!condition := as.factor(.[[colSplit]]) ) %>%
            mutate(pred =  eval(parse(text =
                                        paste0(paste0("ifelse(", condition, " == '", num, "', ", condition, "_",num,",", collapse = " "), "zero",
                                               paste0(rep(")", length(num)), collapse = ""), collapse = "")
            )))

Plot

# use glht to calculate 95% CIs

# Pull out the age column      
ageOrig <- modelDataEdit %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]

# Determine which ages we want to calculate 95% CI for
# Don't need to do it for every participant as age when age is fairly similar, instead do it as a sequence of 0.5 for every unit in age.
# This speeds up the calculation time
ageCalcs <-  c(min(ageOrig), seq(round(min(ageOrig), 1), round(max(ageOrig), 1), 0.5), max(ageOrig) )

# Go through each age and calculate 95% CI
CI_glht  <- lapply(ageCalcs, function(x){

          # Get number of levels in condition
          n <- length(unique(pull(modelDataEdit, !!sym(condition))))

          # Get corresponding rows for condition in coefficients table
          rowIndex <- which(str_detect(string = row.names(summary(fit)$coefficients),
                                       pattern = condition) &
                              str_detect(string = row.names(summary(fit)$coefficients),
                                         pattern = ":", negate = T))

          rowNames <- rownames(summary(fit)$coefficients)

          ageInput <- round(x - mean(ageOrig), 3)

          equations <- sapply(1:(n-1), function(i){
                paste0(rowNames[1], " + \`", rowNames[2], "\`*", ageInput, " + \`", rowNames[rowIndex[i]] , "\` + \`", rowNames[2], ":", rowNames[rowIndex[i]],"\`*",ageInput , " == 0")
              })
          res <- multcomp::glht(fit, linfct = c( paste0(rowNames[1], " + \`", rowNames[2], "\`*", ageInput, " == 0"), equations) )
          
          # Tidy results dataframe and rename rows/columns
          res <- tidy(confint(res))

          rowname <- paste0("Score (", traj, ")")
          
          levelNames <- as.character(levels(as.factor(pull(modelDataEdit, !!sym(condition)))))

            res <-  res %>%
              mutate(condition = levelNames)
          
            })

ageOrig <- modelDataEdit %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
ageCalcs <-  c(min(ageOrig), seq(round(min(ageOrig), 1), round(max(ageOrig), 1), 0.5), max(ageOrig) )
levelNames <- as.character(levels(as.factor(pull(modelDataEdit, !!sym(condition)))))

estimate <- do.call(rbind, CI_glht) %>%
          mutate(age = rep(ageCalcs, each = length(levelNames)) ) 
ggplot() +
              geom_line(data = modelDataEdit, aes(x= age_original ,  y = pred, color = !!sym(condition) ) , na.rm=T) +
              geom_ribbon(data = estimate ,
                          aes(x= age , ymin = conf.low, ymax = conf.high, fill = condition), alpha = 0.2, na.rm = T, show.legend = FALSE) +
              scale_color_manual(values = c("#E69F00","#56B4E9","#009E73","#F5C710")) +
              scale_fill_manual(values = c("#E69F00","#56B4E9","#009E73","#F5C710")) +
              geom_point(data = df.plot,aes(x=Age, y=Phenotype))+
              geom_line(data = df.plot,aes(x=Age, y=Phenotype)) +
              geom_errorbar(data = df.plot, aes(x=Age, y=Phenotype, ymin = lower, ymax = upper)) +
              theme(legend.text = element_text(color = "black"))+
              ylab(paste0("Score (", traj, ")")) +
              xlab("Age")

Scores at ages

# Select ages to calculate scores for (same as the checkboxes on the app)
ageInputScore <- c(4, 9, 14)

# use glht to calculate scores at ages

ageOrig <- modelDataEdit %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]

score_glht <- lapply(as.numeric(ageInputScore), function(x){

          n <- length(unique(pull(modelDataEdit, !!sym(condition))))

          rowIndex <- which(str_detect(string = row.names(summary(fit)$coefficients),
                                       pattern = condition) &
                              str_detect(string = row.names(summary(fit)$coefficients),
                                         pattern = ":", negate = T))

          rowNames <- rownames(summary(fit)$coefficients)

          ageInput <- round(x - mean(ageOrig), 3)

          equations <- sapply(1:(n-1), function(i){
                paste0(rowNames[1], " + ", rowNames[2], "*", ageInput, " + ", rowNames[rowIndex[i]] , " + ", rowNames[2], ":", rowNames[rowIndex[i]],"*",ageInput , " == 0")
              })
            res <- multcomp::glht(fit, linfct = c( paste0(rowNames[1], " + ", rowNames[2], "*", ageInput, " == 0"), equations) )
 
          # Tidy results dataframe and rename rows/columns
          res <- tidy(confint(res))

          rowname <- paste0("Score (", traj, ")")

          levelNames <- as.character(levels(as.factor(pull(modelDataEdit, !!sym(condition)))))

          res <-  res %>%
            mutate(contrast = paste0(rowname, " [", condition, ", level = ", levelNames, " ] (95% CIs)")) %>%
            column_to_rownames(var = "contrast")
})

# Plot the scores at the given age        
estimate <- lapply(score_glht, function(df) {
            df %>%
              dplyr::select(estimate)
          })  %>% do.call(cbind, .)

colnames(estimate) <- ageInputScore

estimate <- estimate %>%
              gather(age, score, 1:ncol(estimate)) %>%
              mutate(age = as.numeric(age))

conf.low <- lapply(score_glht, function(df) {df %>% dplyr::select(conf.low)}) %>% do.call(cbind, .)
colnames(conf.low) <- ageInputScore
conf.low  <- conf.low %>%
              gather(age, conf.low, 1:ncol(conf.low)) %>%
              mutate(age = as.numeric(age))
## Warning: attributes are not identical across measure variables; they will be
## dropped
conf.high <- lapply(score_glht, function(df) {df %>% dplyr::select(conf.high)}) %>% do.call(cbind, .)
colnames(conf.high) <- ageInputScore
conf.high  <- conf.high %>%
              gather(age, conf.high, 1:ncol(conf.high)) %>%
              mutate(age = as.numeric(age))%>%
              dplyr::select(-age)
## Warning: attributes are not identical across measure variables; they will be
## dropped
conf <- cbind(conf.low, conf.high, "age")

# Plot
ggplot() +
   geom_line(data = modelDataEdit, aes(x= age_original ,  y = pred, color = !!sym(condition) ) , na.rm=T) +
   theme(legend.text = element_text(color = "black")) +
   geom_errorbar(data = conf, aes(x = age, ymin = conf.low, ymax = conf.high), width = 0.5) +
   geom_point(data = estimate, aes(x = age, y = score), col = "#1D86C7", size = 5) +
   ylab(paste0("Score (", traj, ")")) +
   xlab("Age")

# Table
estimateCI <- lapply(score_glht, function(df) {
            df %>%
              mutate(estimateCI = paste0(round(estimate,3) , " (", round(conf.low,3) , " - ", round(conf.high,3) , ")")) %>%
              dplyr::select(estimateCI)
          })  %>% do.call(cbind, .)
colnames(estimateCI) <- ageInputScore
estimateCI
##                                                                  4
## Score (score) [er_bin, level = 0 ] (95% CIs) 1.163 (1.131 - 1.195)
## Score (score) [er_bin, level = 1 ] (95% CIs) 2.168 (2.053 - 2.282)
## Score (score) [er_bin, level = 2 ] (95% CIs) 1.786 (1.707 - 1.864)
## Score (score) [er_bin, level = 3 ] (95% CIs) 2.597 (2.479 - 2.716)
##                                                                  9
## Score (score) [er_bin, level = 0 ] (95% CIs) 1.492 (1.456 - 1.528)
## Score (score) [er_bin, level = 1 ] (95% CIs) 2.694 (2.566 - 2.822)
## Score (score) [er_bin, level = 2 ] (95% CIs) 2.034 (1.946 - 2.122)
## Score (score) [er_bin, level = 3 ] (95% CIs) 2.888 (2.754 - 3.021)
##                                                                 14
## Score (score) [er_bin, level = 0 ] (95% CIs) 1.821 (1.763 - 1.878)
## Score (score) [er_bin, level = 1 ] (95% CIs)  3.22 (3.017 - 3.423)
## Score (score) [er_bin, level = 2 ] (95% CIs) 2.283 (2.143 - 2.423)
## Score (score) [er_bin, level = 3 ] (95% CIs) 3.178 (2.964 - 3.392)
# Difference in scores at ages (with 95% CIs)
# "Select two levels from your factor to calculate the difference in scores:"
levelsScores <- c(0,3)
differenceScores <- lapply(as.numeric(ageInputScore), function(x){
            ageInput <- round(x - mean(ageOrig), 3)

            coef <- summary(fit)$coefficients

            rowIndex <- which(str_detect(string = row.names(coef),
                                         pattern = condition) &
                                str_detect(string = row.names(coef),
                                           pattern = ":", negate = T))

            rowNames <- rownames(coef) %>%
              str_remove_all("I|\\(|\\^|\\)|\\:")

            levelNames <- paste0(condition,levelsScores) %>%
              str_remove_all("I|\\(|\\^|\\)|\\:")

            levelNames1 <- rowNames[str_detect(rowNames, paste0(levelNames, collapse = "|"))]

            res <- deltaMethod(fit, c(paste0(
                  "(", rowNames[1], " + ", rowNames[2], "*", ageInput, " + ", levelNames1[1], " + ", levelNames1[2], "*", ageInput, ") - (",rowNames[1], " + ", rowNames[2], "*", ageInput, ")"
                )), parameterNames = rowNames )

            dif <- paste0( round(res$Estimate, 3), " (", round(res$`2.5 %`,3), " - ", round(res$`97.5 %`,3), ")")
            res <- data.frame(age = dif)
return(res)
})   

difTab <- do.call(cbind, differenceScores)
colnames(difTab) <- ageInputScore
rownames(difTab) <- paste0("Difference between ",levelsScores[1]," and ", levelsScores[2]," (95% CI)")
difTab
##                                                         4                     9
## Difference between 0 and 3 (95% CI) 1.434 (1.338 - 1.531) 1.396 (1.287 - 1.505)
##                                                        14
## Difference between 0 and 3 (95% CI) 1.357 (1.183 - 1.532)

Area under the curve

# Select ages to calculate AUC for (same as the slider bar on the app)
AUCages <- c(4,10)

coef <- summary(fit)$coefficients

ageOrig <- modelDataEdit %>%
          pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
age1 <- AUCages[1] - mean(ageOrig)
age2 <- AUCages[2] - mean(ageOrig)

rowNames <- rownames(coef) %>%
          str_remove_all("I|\\(|\\^|\\)|\\:")

AUC <- deltaMethod(fit, c( paste0("(((", age2, ")*(", rowNames[1], ")) + ((", rowNames[2], ")*(", age2, ")^2/2)) - (((", age1,")*(", rowNames[1], ")) + ((", rowNames[2], ")*(", age1, ")^2/2))") ) , parameterNames = rowNames)

AUC <- paste0( round(AUC$Estimate, 2), " (", round(AUC$`2.5 %`,2), " - ", round(AUC$`97.5 %`,2), ")")
        
rowIndex <- which(str_detect(string = row.names(summary(fit)$coefficients),
                                   pattern = condition) &
                          str_detect(string = row.names(summary(fit)$coefficients),
                                     pattern = ":", negate = T))

rowIndexInteract1 <- which(str_detect(string = row.names(summary(fit)$coefficients),
                                            pattern = condition) &
                                   str_detect(string = row.names(summary(fit)$coefficients),
                                              pattern = ":") &
                                   str_detect(string = row.names(summary(fit)$coefficients),
                                              pattern = "\\^", negate = T))
      
n <- length(unique(pull(modelDataEdit, !!sym(condition))))

AUCCovs <- lapply(1:(n-1), function(i){
         
            AUC <- deltaMethod(fit,
                               c( paste0(
                                 "(((",age2,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age2,")^2/2) + ((",rowNames[rowIndex[i]],")*(",age2,")) + ((",rowNames[rowIndexInteract1[i]],")*(",age2,")^2/2) ) -
                                   (((",age1,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age1,")^2/2) + ((",rowNames[rowIndex[i]],")*(",age1,")) + ((",rowNames[rowIndexInteract1[i]],")*(",age1,")^2/2) )"
                               ) )
                               , parameterNames = rowNames )

            AUC <- paste0( round(AUC$Estimate, 2), " (", round(AUC$`2.5 %`,2), " - ", round(AUC$`97.5 %`,2), ")")
            AUC
        })
        
AUC_delta <- list(AUC = AUC, AUCCovs = AUCCovs)
       
# Plot
ggplot(data = modelDataEdit) +
            geom_ribbon(data = modelDataEdit,
                        aes(x = age_original, ymax = pred, ymin = 0, fill = !!sym(condition)),
                        alpha = 0.1, show.legend = FALSE) +
            coord_cartesian(xlim = c(AUCages[1], AUCages[2])) +
            geom_line(aes(x = age_original, y = pred, color = !!sym(condition)), na.rm = TRUE) +
            theme(legend.text = element_text(color = "black")) +
            ylab(paste0("Score (", traj, ")")) +
            xlab("Age") +
            scale_x_continuous(breaks = seq(round(min(modelDataEdit$age_original, na.rm =T)), round(max(modelDataEdit$age_original, na.rm =T)), by = 1),
                               expand = c(0, 0))+
            scale_y_continuous(expand = c(0, 0))

# Table
df <- t(
            cbind(
              data.frame(paste0(AUCages[1], " - ", AUCages[2]),
                         AUC_delta$AUC),
              do.call(cbind, AUC_delta$AUCCovs)
            )
)

rowname <- paste0("AUC (", traj, ")")
levelNames <- as.character(levels(as.factor(pull(modelDataEdit, condition))))
rownames(df) <- c("Age Range", paste0(rowname, " [", condition, ", level = ", levelNames, " ] (95% CIs)") )
df
##                                            [,1]                   
## Age Range                                  "4 - 10"               
## AUC (score) [er_bin, level = 0 ] (95% CIs) "8.16 (8.01 - 8.31)"   
## AUC (score) [er_bin, level = 1 ] (95% CIs) "14.9 (14.37 - 15.43)" 
## AUC (score) [er_bin, level = 2 ] (95% CIs) "11.61 (11.25 - 11.97)"
## AUC (score) [er_bin, level = 3 ] (95% CIs) "16.63 (16.08 - 17.18)"
# -------
# Difference in AUC
# "Select two levels from your factor to calculate the difference in AUC between:"
levelsAUC <- c(0,3)

levelNames <- paste0(condition,levelsAUC) %>%
          str_remove_all("I|\\(|\\^|\\)|\\:")

coef <- summary(fit)$coefficients

ageOrig <- modelDataEdit %>%
          pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
age1 <- AUCages[1] - mean(ageOrig)
age2 <- AUCages[2] - mean(ageOrig)

rowNames <- rownames(coef) %>%
          str_remove_all("I|\\(|\\^|\\)|\\:")

levelNames1 <- rowNames[str_detect(rowNames, paste0(levelNames, collapse = "|"))]

res <- deltaMethod(fit, c( paste0(" (( ((",age2,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age2,")^2/2)   + ( (",levelNames1[1],")*(",age2,") ) + ((",levelNames1[2],")*(",age2,")^2/2) ) - ( ((",age1,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age1,")^2/2)   + ( (",levelNames1[1],")*(",age1,") ) + ((",levelNames1[2],")*(",age1,")^2/2) )) - (( ((",age2,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age2,")^2/2) ) - ( ((",age1,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age1,")^2/2)     )) ") ) ,parameterNames = rowNames)
    
dif <- paste0( round(res$Estimate, 2), " 95% CI: (", round(res$`2.5 %`,2), " - ", round(res$`97.5 %`,2), ")")
statement <- paste0("The difference between the two factor levels (for the age ranges ", AUCages[1], " - ", AUCages[2] ,") is ", dif ,".")
statement
## [1] "The difference between the two factor levels (for the age ranges 4 - 10) is 8.47 95% CI: (7.9 - 9.04)."