TIDAL Code
Download an R markdown script of the code below
Overview
Introduction
This script is to demonstrate some of the code that happens in the background of the application, using the simulated dataset and a simple linear model.
Data Formatting
Upload data in wide format and convert to long format
# Load in wide format dataframe
dataWide <- read.csv("data/emot_reg_emot_simulated.csv")
# Gather sdq_t columns into long format (this is a questionnaire score at each time point)
dataScore <- dataWide %>%
gather(score_cat_test_col, score, all_of(c("sdq_t1", "sdq_t2", "sdq_t3", "sdq_t4", "sdq_t5")))
# Gather age_t columns into long format (this is age at each time point)
dataLong <- dataWide %>%
gather(time_point, age, all_of(c("age_t1", "age_t2", "age_t3", "age_t4", "age_t5")))
# Merge them into one dataframe
dataLong[,"score"] <- dataScore[,"score"]
# Have a look at the dataframe
head(dataLong)
## subject sdq_t1 sdq_t2 sdq_t3 sdq_t4 sdq_t5 female er_total_t1 er_total_t2
## 1 2 0 2 NA 0 0 1 1 6
## 2 3 2 3 1 3 2 0 4 4
## 3 4 0 0 NA 0 0 0 4 2
## 4 5 0 0 0 2 NA 0 4 5
## 5 6 3 1 1 1 1 0 2 2
## 6 8 0 1 1 4 2 0 5 2
## er_t1_bin er_t2_bin er_bin time_point age score
## 1 0 0 0 age_t1 3.041096 0
## 2 0 0 0 age_t1 3.126027 2
## 3 0 0 0 age_t1 3.172603 0
## 4 0 0 0 age_t1 3.049315 0
## 5 0 0 0 age_t1 3.065753 3
## 6 0 0 0 age_t1 3.027397 0
Data Exploration
Choose variables for modelling
# This is similar to the drop down selections on the Data Exploration page.
ID <- "subject"
traj <- "score"
age <- "age"
modelType <- "Linear"
timePoint <- "time_point"
# Mean center age (this helps the model fit)
dataLong <- dataLong %>%
mutate(age_original = as.numeric(!!sym(age)) ) %>%
mutate(!!sym(age) := as.numeric(!!sym(age)) - mean( as.numeric(!!sym(age)), na.rm = T ))
Descriptive Statistics
# Table:
dataLong %>%
group_by(across( !!timePoint )) %>%
summarise(N = sum(!is.na( !!sym(traj) )),
mean = mean(!!sym(traj), na.rm = T),
SD = sd(!!sym(traj), na.rm = T),
median = median(!!sym(traj), na.rm = T),
IQR = IQR(!!sym(traj), na.rm = T)
)
## # A tibble: 5 × 6
## time_point N mean SD median IQR
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 age_t1 12637 1.41 1.55 1 2
## 2 age_t2 12682 1.37 1.59 1 2
## 3 age_t3 10855 1.52 1.77 1 2
## 4 age_t4 10296 1.89 2.03 1 3
## 5 age_t5 9070 2.04 2.14 1 3
# Plot
df.plot <- dataLong %>%
group_by(across( !!timePoint )) %>%
summarise(Age = mean(age_original, na.rm = T),
Phenotype = mean(!!sym(traj), na.rm = T),
SD = sd(!!sym(traj), na.rm = T),
n = sum(!is.na( !!sym(traj) ))) %>%
mutate(upper = Phenotype + ( qnorm(0.975)*SD/sqrt(n) ),
lower = Phenotype - ( qnorm(0.975)*SD/sqrt(n) ))
ggplot(df.plot,aes(x=Age, y=Phenotype)) +
geom_point()+
geom_line() +
geom_errorbar(aes(ymin = lower, ymax = upper))+
ylab(paste0("Score (", traj, ")")) +
xlab("Age")
Model Results
Run the model using lme4
# Run the model using lmer
fit <- lmer(formula = "score ~ age + (1 + age | subject)",
REML=F ,
data = dataLong,
control=lmerControl(optimizer="bobyqa",
optCtrl=list(maxfun=2e5)))
# Summary of the model:
summary(fit)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: score ~ age + (1 + age | subject)
## Data: dataLong
## Control: lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
##
## AIC BIC logLik deviance df.resid
## 208467.1 208520.6 -104227.5 208455.1 55531
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7416 -0.5239 -0.1961 0.4038 5.7814
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 1.4036 1.1847
## age 0.0223 0.1493 0.47
## Residual 1.5628 1.2501
## Number of obs: 55537, groups: subject, 12720
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.618345 0.011963 135.28
## age 0.065644 0.002055 31.95
##
## Correlation of Fixed Effects:
## (Intr)
## age 0.323
N observations and groups
paste0("The number of observations (measurements) is ",
format(summary(fit)$devcomp$dims[[1]], big.mark=",", scientific=FALSE),
" and the number of groups (people) is ",
format(summary(fit)$ngrps[[1]], big.mark=",", scientific=FALSE) ,
".")
## [1] "The number of observations (measurements) is 55,537 and the number of groups (people) is 12,720."
Fixed effects
cbind(
tidy(fit, "fixed"),
confint(fit, "beta_", method = "Wald")) %>%
mutate(p.z = 2 * (1 - pnorm(abs(statistic)))) %>%
mutate(p.z = ifelse(p.z <= 0, "p < 0.001", round(p.z, 3)))
## effect term estimate std.error statistic 2.5 %
## (Intercept) fixed (Intercept) 1.61834513 0.011962990 135.27931 1.59489810
## age fixed age 0.06564445 0.002054591 31.95014 0.06161753
## 97.5 % p.z
## (Intercept) 1.64179216 p < 0.001
## age 0.06967138 p < 0.001
Random effects
randomDF <- as.data.frame(VarCorr(fit),
order = "lower.tri") %>%
mutate(across(where(is.numeric), round, 3))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(where(is.numeric), round, 3)`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
colnames(randomDF) <- c("Level", "Variable1", "Variable2", "Variance/Covariance", "SD Variance/Covariance")
randomDF
## Level Variable1 Variable2 Variance/Covariance SD Variance/Covariance
## 1 subject (Intercept) <NA> 1.404 1.185
## 2 subject (Intercept) age 0.083 0.468
## 3 subject age <NA> 0.022 0.149
## 4 Residual <NA> <NA> 1.563 1.250
Plot
# Add the "prediction"/model col to dataframe (adjustedScore)
# The alternative is to use the "predict" function, but this didn't work when users wanted to add categorical covariates.
# We had to calculate it manually instead. Below is just an example of how you do this for a linear model with no covariates.
# Pull out the mean centered age column
ageVec <- dataLong %>% pull(!!age)
# Get the coefficients from the model
coef <- summary(fit)$coefficients
# Calculate the model mean/prediciton. This is age multiplied by the slope plus the intercept.
adjustedScore <- ageVec * coef[2,1] + coef[1,1]
# Add this as a new column to the dataframe
modelDataEdit <- dataLong %>%
mutate(pred = adjustedScore)
# Add 95% CIs for these estimates using glht
ageOrig <- dataLong %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
# Get a vector of the ages we want to calculate 95% CIs for (instead of doing all the ages in the column)
ageCalcs <- c(min(ageOrig), seq(round(min(ageOrig), 1), round(max(ageOrig), 1), 0.5), max(ageOrig) )
# Go through each age and calculate the 95% CI
score_glht <- lapply(ageCalcs, function(x){
# Get the row names of the coefficients to put in the glht function (this is Intercept and age)
rowNames <- rownames(summary(fit)$coefficients)
# Round age to 3 decimal places
ageInput <- round(x - mean(ageOrig), 3)
# Use glht to get 95% CIs
# Essentially we are doing the intercept + slope * age == 0 as it is a linear model
res <- multcomp::glht(fit, linfct = c( paste0(rowNames[1], " + ", rowNames[2], "*", ageInput, " == 0")) )
# Tidy up the dataframe
res <- tidy(confint(res))
rowname <- paste0("Score (", traj, ")")
res <- res %>%
mutate(contrast = paste0(rowname, " (95% CIs)")) %>%
column_to_rownames(var = "contrast") %>%
mutate(across(where(is.numeric), round, 2))
return(res)
})
# Put everything into one dataframe
estimate <- do.call(rbind, score_glht) %>%
mutate(age = ageCalcs)
# Plot the mean trajectory and it's 95% CIs and the underlying descriptive data:
ggplot() +
geom_line(data = modelDataEdit, aes(x= age_original , y = pred), color = "#1D86C7", linewidth = 1.5, na.rm=T) +
geom_ribbon(data = estimate, aes(x= age , ymin = conf.low, ymax = conf.high), fill = "#1D86C7", alpha = 0.2, na.rm = T) +
geom_point(data = df.plot,aes(x=Age, y=Phenotype))+
geom_line(data = df.plot,aes(x=Age, y=Phenotype)) +
geom_errorbar(data = df.plot, aes(x=Age, y=Phenotype, ymin = lower, ymax = upper)) +
ylab(paste0("Score (", traj, ")")) +
xlab("Age")
Scores at ages
# Choose some ages to calculate (these are equivalent to the checkboxes on the app)
ageInput <- c(4, 7, 12)
# Go through each age and let's calculate the score at that age using glht (similar to getting 95% CIs as above)
score <- lapply(as.numeric(ageInput), function(x){
# Get the row names that correspond to the coefficients, ie. Intercept, Age (slope)
rowNames <- rownames(summary(fit)$coefficients)
# Round age and mean center it so it's on the correct scale
ageInputRound <- round(x - mean(ageOrig), 3)
# Essentially we are doing the intercept + slope * age == 0 as it is a linear model
res <- multcomp::glht(fit, linfct = c( paste0(rowNames[1], " + ", rowNames[2], "*", ageInputRound, " == 0")) )
# Tidy up the data frame by reformatting it and labelling it sensibly
res <- tidy(confint(res))
rowname <- paste0("Score (", traj, ")")
res <- res %>%
mutate(contrast = paste0(rowname, " (95% CIs)")) %>%
column_to_rownames(var = "contrast")
})
# Tidy it up into one dataframe
estimate <- lapply(score, function(df) {
df %>%
dplyr::select(estimate)
}) %>% do.call(cbind, .)
colnames(estimate) <- ageInput
estimate <- estimate %>%
gather(age, score, 1:ncol(estimate)) %>%
mutate(age = as.numeric(age))
# Put the CIs into separate dataframes ready for plotting
conf.low <- lapply(score, function(df) {df %>% dplyr::select(conf.low)}) %>% do.call(cbind, .)
colnames(conf.low) <- ageInput
conf.low <- conf.low %>%
gather(age, conf.low, 1:ncol(conf.low)) %>%
mutate(age = as.numeric(age))
conf.high <- lapply(score, function(df) {df %>% dplyr::select(conf.high)}) %>% do.call(cbind, .)
colnames(conf.high) <- ageInput
conf.high <- conf.high %>%
gather(age, conf.high, 1:ncol(conf.high)) %>%
mutate(age = as.numeric(age))%>%
dplyr::select(-age)
conf <- cbind(conf.low, conf.high, "age")
ggplot() +
geom_line(data = modelDataEdit, aes(x= age_original , y = pred ) , na.rm=T) +
theme(legend.text = element_text(color = "black")) +
geom_errorbar(data = conf, aes(x = age, ymin = conf.low, ymax = conf.high), width = 0.5) +
geom_point(data = estimate, aes(x = age, y = score), col = "#1D86C7", size = 5) +
ylab(paste0("Score (", traj, ")")) +
xlab("Age")
# Tidy up the estimate and 95% CIs to display nicely to the user
estimateCI <- lapply(score, function(df) {
df %>%
mutate(estimateCI = paste0(round(estimate,3) , " (", round(conf.low,3) , " - ", round(conf.high,3) , ")")) %>%
dplyr::select(estimateCI)
}) %>% do.call(cbind, .)
colnames(estimateCI) <- ageInput
estimateCI
## 4 7
## Score (score) (95% CIs) 1.383 (1.36 - 1.406) 1.58 (1.557 - 1.603)
## 12
## Score (score) (95% CIs) 1.908 (1.875 - 1.942)
Area under the curve
# Choose 2 ages to calculate AUC for (this is the slider on the app)
AUCages <- c(4,10)
# Calculate the AUC for the ages the user has chosen
coef <- summary(fit)$coefficients
# Pull out the age column from the dataframe
ageOrig <- modelDataEdit %>%
pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
# Minus mean age so the age is on the same scale as that used in the model
age1 <- AUCages[1] - mean(ageOrig)
age2 <- AUCages[2] - mean(ageOrig)
# Remove any characters that means the deltaMethod function wont work
rowNames <- rownames(coef) %>%
str_remove_all("I|\\(|\\^|\\)")
# Use the delta method to calculate AUC
# AUC is calculate by doing age2 (the larger age) * intercept + slope * age2^2/2 minus age1 (the younger age) * intercept + slope * age2^2/2. This is for a linear model.
AUC <- car::deltaMethod(fit, c( paste0("(((", age2, ")*(", rowNames[1], ")) + ((", rowNames[2], ")*(", age2, ")^2/2)) - (((", age1,")*(", rowNames[1], ")) + ((", rowNames[2], ")*(", age1, ")^2/2))") ) , parameterNames = rowNames)
# Format the results
AUC <- paste0( round(AUC$Estimate, 2), " (", round(AUC$`2.5 %`,2), " - ", round(AUC$`97.5 %`,2), ")")
# Plot the results
ggplot() +
geom_ribbon(data = modelDataEdit,
aes(x = age_original, ymax = pred, ymin = 0),
alpha = 0.1, show.legend = FALSE, fill = "#1D86C7") +
geom_line(data = modelDataEdit, aes(x= age_original , y = pred ) , na.rm=T)+
coord_cartesian(xlim = c(AUCages[1], AUCages[2])) +
scale_colour_discrete(na.translate = F) +
theme(legend.text = element_text(color = "black")) +
ylab(paste0("Score (", traj, ")")) +
xlab("Age") +
scale_x_continuous(breaks = seq(round(min(modelDataEdit$age_original, na.rm =T)), round(max(modelDataEdit$age_original, na.rm =T)), by = 1),
expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0))
# Format the results to display to the user
df <- t(data.frame(paste0(AUCages[1], " - ", AUCages[2]),
AUC))
rowname <- paste0("AUC (", traj, ") (95% CIs)")
rownames(df) <- c("Age Range", rowname)
df
## [,1]
## Age Range "4 - 10"
## AUC (score) (95% CIs) "9.48 (9.34 - 9.62)"
Interaction Variable (factor with two levels)
Choose variable
# Demo shown for categorical variable
# Split the trajectory by the sex variable
condition <- "female"
# Get the index for the column we want to split the trajectory on
colSplit <- which(colnames(dataLong) %in% condition)
# Make sure this column is a factor and replace some characters that may cause a problem with an underscore
modelData <- dataLong %>%
filter(!is.na(!!sym(condition))) %>%
mutate(!!condition := factor( str_replace_all(.[[colSplit]], " |-|/", "_") ) )
Model Results
# Run the model using lmer
fit <- lmer(formula = "score ~ age + female + age*female + (1 + age | subject)",
REML=F ,
data = modelData,
control=lmerControl(optimizer="bobyqa",
optCtrl=list(maxfun=2e5)))
# Summary of the model:
summary(fit)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: score ~ age + female + age * female + (1 + age | subject)
## Data: modelData
## Control: lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
##
## AIC BIC logLik deviance df.resid
## 208365.6 208437.0 -104174.8 208349.6 55529
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7675 -0.5272 -0.1934 0.4059 5.7681
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 1.39424 1.1808
## age 0.02194 0.1481 0.46
## Residual 1.56320 1.2503
## Number of obs: 55537, groups: subject, 12720
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.526786 0.016785 90.961
## age 0.047477 0.002888 16.441
## female1 0.184427 0.023863 7.729
## age:female1 0.036473 0.004093 8.911
##
## Correlation of Fixed Effects:
## (Intr) age femal1
## age 0.322
## female1 -0.703 -0.226
## age:female1 -0.227 -0.705 0.319
# Fixed effects
cbind(
tidy(fit, "fixed"),
confint(fit, "beta_", method = "Wald")) %>%
mutate(p.z = 2 * (1 - pnorm(abs(statistic)))) %>%
mutate(p.z = ifelse(p.z < 0.001, "p < 0.001", round(p.z, 3) ))
## effect term estimate std.error statistic 2.5 %
## (Intercept) fixed (Intercept) 1.52678648 0.016785153 90.960536 1.49388818
## age fixed age 0.04747730 0.002887690 16.441272 0.04181753
## female1 fixed female1 0.18442702 0.023862626 7.728697 0.13765713
## age:female1 fixed age:female1 0.03647303 0.004093226 8.910585 0.02845046
## 97.5 % p.z
## (Intercept) 1.55968477 p < 0.001
## age 0.05313707 p < 0.001
## female1 0.23119691 p < 0.001
## age:female1 0.04449561 p < 0.001
# Random effects
randomDF <- as.data.frame(VarCorr(fit),
order = "lower.tri") %>%
mutate(across(where(is.numeric), round, 3))
colnames(randomDF) <- c("Level", "Variable1", "Variable2", "Variance/Covariance", "SD Variance/Covariance")
randomDF
## Level Variable1 Variable2 Variance/Covariance SD Variance/Covariance
## 1 subject (Intercept) <NA> 1.394 1.181
## 2 subject (Intercept) age 0.081 0.464
## 3 subject age <NA> 0.022 0.148
## 4 Residual <NA> <NA> 1.563 1.250
# add the "predicted" column to this dataset (it's not really a prediction because its the same dataset, it just shows the model)
# Pull out the age column
ageVec <- modelData %>% pull(!!age)
# Get the coefficients of the model
coef <- summary(fit)$coefficients
# Get the predicted column (note we did this manually in case covariates were added). If covariates were added then essentially this predicted "zero" variable is when the covariates are set to zero.
zero <- ageVec * coef[2,1] + coef[1,1]
# Get the row index from the coefficients table that correspond to the condition we are splitting on
rowIndex <- which(str_detect(string = row.names(coef),
pattern = condition) &
str_detect(string = row.names(coef),
pattern = ":", negate = T))
# Same as above but for the interaction term
rowIndexInteract1 <- which(str_detect(string = row.names(coef),
pattern = condition) &
str_detect(string = row.names(coef),
pattern = ":") &
str_detect(string = row.names(coef),
pattern = "\\^", negate = T))
# How many levels does out condition have, eg. 2 if options are only male or female in this example
n <- length(unique(pull(modelData, !!sym(condition))))
# Calculate the prediction for the other level, ie. when the covariate is not set to zero
predCovs <- lapply(1:(n-1), function(i){
coef[1,1] +
(ageVec * coef[2,1]) +
coef[rowIndex[i],1] +
(ageVec * coef[rowIndexInteract1[i],1])
})
# Name of level, in this case female is coded as 0 or 1
num <- str_subset(row.names(coef), condition) %>%
sub(paste0(".*", condition), "", .) %>%
unique()
# Name the list by the the level it corresponds to
names(predCovs) <- paste0(condition, "_", num)
# Add these as new columns to the dataset and make a pred column which gives the correct predicted value for the corresponding condition level, ie. the female predicted value if the participant is female.
modelDataEdit <- cbind(modelData, do.call(cbind, predCovs)) %>%
mutate(zero = zero) %>%
mutate(!!condition := as.factor(.[[colSplit]]) ) %>%
mutate(pred = eval(parse(text =
paste0(paste0("ifelse(", condition, " == '", num, "', ", condition, "_",num,",", collapse = " "), "zero",
paste0(rep(")", length(num)), collapse = ""), collapse = "")
)))
Plot
# use glht to calculate 95% CIs
# Pull out the age column
ageOrig <- modelDataEdit %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
# Determine which ages we want to calculate 95% CI for
# Don't need to do it for every participant as age when age is fairly similar, instead do it as a sequence of 0.5 for every unit in age.
# This speeds up the calculation time
ageCalcs <- c(min(ageOrig), seq(round(min(ageOrig), 1), round(max(ageOrig), 1), 0.5), max(ageOrig) )
# Go through each age and calculate 95% CI
CI_glht <- lapply(ageCalcs, function(x){
# Get number of levels in condition
n <- length(unique(pull(modelDataEdit, !!sym(condition))))
# Get corresponding rows for condition in coefficients table
rowIndex <- which(str_detect(string = row.names(summary(fit)$coefficients),
pattern = condition) &
str_detect(string = row.names(summary(fit)$coefficients),
pattern = ":", negate = T))
rowNames <- rownames(summary(fit)$coefficients)
ageInput <- round(x - mean(ageOrig), 3)
equations <- sapply(1:(n-1), function(i){
paste0(rowNames[1], " + \`", rowNames[2], "\`*", ageInput, " + \`", rowNames[rowIndex[i]] , "\` + \`", rowNames[2], ":", rowNames[rowIndex[i]],"\`*",ageInput , " == 0")
})
res <- multcomp::glht(fit, linfct = c( paste0(rowNames[1], " + \`", rowNames[2], "\`*", ageInput, " == 0"), equations) )
# Tidy results dataframe and rename rows/columns
res <- tidy(confint(res))
rowname <- paste0("Score (", traj, ")")
levelNames <- as.character(levels(as.factor(pull(modelDataEdit, !!sym(condition)))))
res <- res %>%
mutate(condition = levelNames)
})
ageOrig <- modelDataEdit %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
ageCalcs <- c(min(ageOrig), seq(round(min(ageOrig), 1), round(max(ageOrig), 1), 0.5), max(ageOrig) )
levelNames <- as.character(levels(as.factor(pull(modelDataEdit, !!sym(condition)))))
estimate <- do.call(rbind, CI_glht) %>%
mutate(age = rep(ageCalcs, each = length(levelNames)) )
ggplot() +
geom_line(data = modelDataEdit, aes(x= age_original , y = pred, color = !!sym(condition) ) , na.rm=T) +
geom_ribbon(data = estimate ,
aes(x= age , ymin = conf.low, ymax = conf.high, fill = condition), alpha = 0.2, na.rm = T, show.legend = FALSE) +
scale_color_manual(values = c("orange", "blue")) +
scale_fill_manual(values = c("orange", "blue")) +
geom_point(data = df.plot,aes(x=Age, y=Phenotype))+
geom_line(data = df.plot,aes(x=Age, y=Phenotype)) +
geom_errorbar(data = df.plot, aes(x=Age, y=Phenotype, ymin = lower, ymax = upper)) +
theme(legend.text = element_text(color = "black"))+
ylab(paste0("Score (", traj, ")")) +
xlab("Age")
Scores at ages
# Select ages to calculate scores for (same as the checkboxes on the app)
ageInputScore <- c(4, 9, 14)
# use glht to calculate scores at ages
ageOrig <- modelDataEdit %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
score_glht <- lapply(as.numeric(ageInputScore), function(x){
n <- length(unique(pull(modelDataEdit, !!sym(condition))))
rowIndex <- which(str_detect(string = row.names(summary(fit)$coefficients),
pattern = condition) &
str_detect(string = row.names(summary(fit)$coefficients),
pattern = ":", negate = T))
rowNames <- rownames(summary(fit)$coefficients)
ageInput <- round(x - mean(ageOrig), 3)
equations <- sapply(1:(n-1), function(i){
paste0(rowNames[1], " + ", rowNames[2], "*", ageInput, " + ", rowNames[rowIndex[i]] , " + ", rowNames[2], ":", rowNames[rowIndex[i]],"*",ageInput , " == 0")
})
res <- multcomp::glht(fit, linfct = c( paste0(rowNames[1], " + ", rowNames[2], "*", ageInput, " == 0"), equations) )
# Tidy results dataframe and rename rows/columns
res <- tidy(confint(res))
rowname <- paste0("Score (", traj, ")")
levelNames <- as.character(levels(as.factor(pull(modelDataEdit, !!sym(condition)))))
res <- res %>%
mutate(contrast = paste0(rowname, " [", condition, ", level = ", levelNames, " ] (95% CIs)")) %>%
column_to_rownames(var = "contrast")
})
# Plot the scores at the given age
estimate <- lapply(score_glht, function(df) {
df %>%
dplyr::select(estimate)
}) %>% do.call(cbind, .)
colnames(estimate) <- ageInputScore
estimate <- estimate %>%
gather(age, score, 1:ncol(estimate)) %>%
mutate(age = as.numeric(age))
conf.low <- lapply(score_glht, function(df) {df %>% dplyr::select(conf.low)}) %>% do.call(cbind, .)
colnames(conf.low) <- ageInputScore
conf.low <- conf.low %>%
gather(age, conf.low, 1:ncol(conf.low)) %>%
mutate(age = as.numeric(age))
conf.high <- lapply(score_glht, function(df) {df %>% dplyr::select(conf.high)}) %>% do.call(cbind, .)
colnames(conf.high) <- ageInputScore
conf.high <- conf.high %>%
gather(age, conf.high, 1:ncol(conf.high)) %>%
mutate(age = as.numeric(age))%>%
dplyr::select(-age)
conf <- cbind(conf.low, conf.high, "age")
# Plot
ggplot() +
geom_line(data = modelDataEdit, aes(x= age_original , y = pred, color = !!sym(condition) ) , na.rm=T) +
theme(legend.text = element_text(color = "black")) +
geom_errorbar(data = conf, aes(x = age, ymin = conf.low, ymax = conf.high), width = 0.5) +
geom_point(data = estimate, aes(x = age, y = score), col = "#1D86C7", size = 5) +
ylab(paste0("Score (", traj, ")")) +
xlab("Age")
# Table
estimateCI <- lapply(score_glht, function(df) {
df %>%
mutate(estimateCI = paste0(round(estimate,3) , " (", round(conf.low,3) , " - ", round(conf.high,3) , ")")) %>%
dplyr::select(estimateCI)
}) %>% do.call(cbind, .)
colnames(estimateCI) <- ageInputScore
estimateCI
## 4
## Score (score) [female, level = 0 ] (95% CIs) 1.357 (1.32 - 1.394)
## Score (score) [female, level = 1 ] (95% CIs) 1.411 (1.373 - 1.448)
## 9
## Score (score) [female, level = 0 ] (95% CIs) 1.594 (1.553 - 1.636)
## Score (score) [female, level = 1 ] (95% CIs) 1.83 (1.789 - 1.872)
## 14
## Score (score) [female, level = 0 ] (95% CIs) 1.832 (1.767 - 1.896)
## Score (score) [female, level = 1 ] (95% CIs) 2.25 (2.186 - 2.315)
# Difference in scores at ages (with 95% CIs)
levelsScores <- c(0,1)
differenceScores <- lapply(as.numeric(ageInputScore), function(x){
ageInput <- round(x - mean(ageOrig), 3)
coef <- summary(fit)$coefficients
rowIndex <- which(str_detect(string = row.names(coef),
pattern = condition) &
str_detect(string = row.names(coef),
pattern = ":", negate = T))
rowNames <- rownames(coef) %>%
str_remove_all("I|\\(|\\^|\\)|\\:")
levelNames <- paste0(condition,levelsScores) %>%
str_remove_all("I|\\(|\\^|\\)|\\:")
levelNames1 <- rowNames[str_detect(rowNames, paste0(levelNames, collapse = "|"))]
res <- deltaMethod(fit, c(paste0(
"(", rowNames[1], " + ", rowNames[2], "*", ageInput, " + ", levelNames1[1], " + ", levelNames1[2], "*", ageInput, ") - (",rowNames[1], " + ", rowNames[2], "*", ageInput, ")"
)), parameterNames = rowNames )
dif <- paste0( round(res$Estimate, 3), " (", round(res$`2.5 %`,3), " - ", round(res$`97.5 %`,3), ")")
res <- data.frame(age = dif)
return(res)
})
difTab <- do.call(cbind, differenceScores)
colnames(difTab) <- ageInputScore
rownames(difTab) <- paste0("Difference between ",levelsScores[1]," and ", levelsScores[2]," (95% CI)")
difTab
## 4 9
## Difference between 0 and 1 (95% CI) 0.054 (0.007 - 0.1) 0.236 (0.185 - 0.288)
## 14
## Difference between 0 and 1 (95% CI) 0.419 (0.339 - 0.498)
Area under the curve
# Select ages to calculate AUC for (same as the slider bar on the app)
AUCages <- c(4,10)
coef <- summary(fit)$coefficients
ageOrig <- modelDataEdit %>%
pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
age1 <- AUCages[1] - mean(ageOrig)
age2 <- AUCages[2] - mean(ageOrig)
rowNames <- rownames(coef) %>%
str_remove_all("I|\\(|\\^|\\)|\\:")
AUC <- deltaMethod(fit, c( paste0("(((", age2, ")*(", rowNames[1], ")) + ((", rowNames[2], ")*(", age2, ")^2/2)) - (((", age1,")*(", rowNames[1], ")) + ((", rowNames[2], ")*(", age1, ")^2/2))") ) , parameterNames = rowNames)
AUC <- paste0( round(AUC$Estimate, 2), " (", round(AUC$`2.5 %`,2), " - ", round(AUC$`97.5 %`,2), ")")
rowIndex <- which(str_detect(string = row.names(summary(fit)$coefficients),
pattern = condition) &
str_detect(string = row.names(summary(fit)$coefficients),
pattern = ":", negate = T))
rowIndexInteract1 <- which(str_detect(string = row.names(summary(fit)$coefficients),
pattern = condition) &
str_detect(string = row.names(summary(fit)$coefficients),
pattern = ":") &
str_detect(string = row.names(summary(fit)$coefficients),
pattern = "\\^", negate = T))
n <- length(unique(pull(modelDataEdit, !!sym(condition))))
AUCCovs <- lapply(1:(n-1), function(i){
AUC <- deltaMethod(fit,
c( paste0(
"(((",age2,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age2,")^2/2) + ((",rowNames[rowIndex[i]],")*(",age2,")) + ((",rowNames[rowIndexInteract1[i]],")*(",age2,")^2/2) ) -
(((",age1,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age1,")^2/2) + ((",rowNames[rowIndex[i]],")*(",age1,")) + ((",rowNames[rowIndexInteract1[i]],")*(",age1,")^2/2) )"
) )
, parameterNames = rowNames )
AUC <- paste0( round(AUC$Estimate, 2), " (", round(AUC$`2.5 %`,2), " - ", round(AUC$`97.5 %`,2), ")")
AUC
})
AUC_delta <- list(AUC = AUC, AUCCovs = AUCCovs)
# Plot
ggplot(data = modelDataEdit) +
geom_ribbon(data = modelDataEdit,
aes(x = age_original, ymax = pred, ymin = 0, fill = !!sym(condition)),
alpha = 0.1, show.legend = FALSE) +
coord_cartesian(xlim = c(AUCages[1], AUCages[2])) +
geom_line(aes(x = age_original, y = pred, color = !!sym(condition)), na.rm = TRUE) +
theme(legend.text = element_text(color = "black")) +
ylab(paste0("Score (", traj, ")")) +
xlab("Age") +
scale_x_continuous(breaks = seq(round(min(modelDataEdit$age_original, na.rm =T)), round(max(modelDataEdit$age_original, na.rm =T)), by = 1),
expand = c(0, 0))+
scale_y_continuous(expand = c(0, 0))
# Table
df <- t(
cbind(
data.frame(paste0(AUCages[1], " - ", AUCages[2]),
AUC_delta$AUC),
do.call(cbind, AUC_delta$AUCCovs)
)
)
rowname <- paste0("AUC (", traj, ")")
levelNames <- as.character(levels(as.factor(pull(modelDataEdit, condition))))
rownames(df) <- c("Age Range", paste0(rowname, " [", condition, ", level = ", levelNames, " ] (95% CIs)") )
df
## [,1]
## Age Range "4 - 10"
## AUC (score) [female, level = 0 ] (95% CIs) "9 (8.8 - 9.19)"
## AUC (score) [female, level = 1 ] (95% CIs) "9.97 (9.78 - 10.17)"
# -------
# Difference in AUC
levelsAUC <- c(0,1)
levelNames <- paste0(condition,levelsAUC) %>%
str_remove_all("I|\\(|\\^|\\)|\\:")
coef <- summary(fit)$coefficients
ageOrig <- modelDataEdit %>%
pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
age1 <- AUCages[1] - mean(ageOrig)
age2 <- AUCages[2] - mean(ageOrig)
rowNames <- rownames(coef) %>%
str_remove_all("I|\\(|\\^|\\)|\\:")
levelNames1 <- rowNames[str_detect(rowNames, paste0(levelNames, collapse = "|"))]
res <- deltaMethod(fit, c( paste0(" (( ((",age2,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age2,")^2/2) + ( (",levelNames1[1],")*(",age2,") ) + ((",levelNames1[2],")*(",age2,")^2/2) ) - ( ((",age1,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age1,")^2/2) + ( (",levelNames1[1],")*(",age1,") ) + ((",levelNames1[2],")*(",age1,")^2/2) )) - (( ((",age2,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age2,")^2/2) ) - ( ((",age1,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age1,")^2/2) )) ") ) ,parameterNames = rowNames)
dif <- paste0( round(res$Estimate, 2), " 95% CI: (", round(res$`2.5 %`,2), " - ", round(res$`97.5 %`,2), ")")
statement <- paste0("The difference between the two factor levels (for the age ranges ", AUCages[1], " - ", AUCages[2] ,") is ", dif ,".")
statement
## [1] "The difference between the two factor levels (for the age ranges 4 - 10) is 0.98 95% CI: (0.71 - 1.25)."
Interaction Variable (factor with four levels)
Choose variable
# Demo shown for categorical variable
# Split the trajectory by the er_bin variable
condition <- "er_bin"
# Get the index for the column we want to split the trajectory on
colSplit <- which(colnames(dataLong) %in% condition)
# Make sure this column is a factor and replace some characters that may cause a problem with an underscore
modelData <- dataLong %>%
filter(!is.na(!!sym(condition))) %>%
mutate(!!condition := factor( str_replace_all(.[[colSplit]], " |-|/", "_") ) )
Model Results
# Run the model using lmer
fit <- lmer(formula = "score ~ age + er_bin + age*er_bin + (1 + age | subject)",
REML=F ,
data = modelData,
control=lmerControl(optimizer="bobyqa",
optCtrl=list(maxfun=2e5)))
# Summary of the model:
summary(fit)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: score ~ age + er_bin + age * er_bin + (1 + age | subject)
## Data: modelData
## Control: lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
##
## AIC BIC logLik deviance df.resid
## 206968.5 207075.6 -103472.2 206944.5 55525
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6767 -0.5290 -0.1954 0.4083 5.7020
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 1.22111 1.1050
## age 0.02218 0.1489 0.50
## Residual 1.56268 1.2501
## Number of obs: 55537, groups: subject, 12720
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.398532 0.013044 107.218
## age 0.065771 0.002357 27.901
## er_bin1 1.146156 0.048045 23.856
## er_bin2 0.565151 0.034293 16.480
## er_bin3 1.406815 0.049819 28.239
## age:er_bin1 0.039434 0.008644 4.562
## age:er_bin2 -0.016025 0.006197 -2.586
## age:er_bin3 -0.007698 0.009110 -0.845
##
## Correlation of Fixed Effects:
## (Intr) age er_bn1 er_bn2 er_bn3 ag:r_1 ag:r_2
## age 0.337
## er_bin1 -0.271 -0.091
## er_bin2 -0.380 -0.128 0.103
## er_bin3 -0.262 -0.088 0.071 0.100
## age:er_bin1 -0.092 -0.273 0.331 0.035 0.024
## age:er_bin2 -0.128 -0.380 0.035 0.336 0.034 0.104
## age:er_bin3 -0.087 -0.259 0.024 0.033 0.344 0.071 0.098
# Fixed effects
cbind(
tidy(fit, "fixed"),
confint(fit, "beta_", method = "Wald")) %>%
mutate(p.z = 2 * (1 - pnorm(abs(statistic)))) %>%
mutate(p.z = ifelse(p.z < 0.001, "p < 0.001", round(p.z, 3) ))
## effect term estimate std.error statistic 2.5 %
## (Intercept) fixed (Intercept) 1.398532016 0.013043794 107.2181905 1.37296665
## age fixed age 0.065771213 0.002357291 27.9011799 0.06115101
## er_bin1 fixed er_bin1 1.146155877 0.048044798 23.8559831 1.05198980
## er_bin2 fixed er_bin2 0.565151267 0.034293043 16.4800559 0.49793814
## er_bin3 fixed er_bin3 1.406815244 0.049819026 28.2385134 1.30917175
## age:er_bin1 fixed age:er_bin1 0.039433875 0.008643715 4.5621443 0.02249251
## age:er_bin2 fixed age:er_bin2 -0.016024970 0.006196509 -2.5861287 -0.02816990
## age:er_bin3 fixed age:er_bin3 -0.007698113 0.009110100 -0.8450086 -0.02555358
## 97.5 % p.z
## (Intercept) 1.424097383 p < 0.001
## age 0.070391419 p < 0.001
## er_bin1 1.240321950 p < 0.001
## er_bin2 0.632364396 p < 0.001
## er_bin3 1.504458742 p < 0.001
## age:er_bin1 0.056375246 p < 0.001
## age:er_bin2 -0.003880035 0.01
## age:er_bin3 0.010157354 0.398
# Random effects
randomDF <- as.data.frame(VarCorr(fit),
order = "lower.tri") %>%
mutate(across(where(is.numeric), round, 3))
colnames(randomDF) <- c("Level", "Variable1", "Variable2", "Variance/Covariance", "SD Variance/Covariance")
randomDF
## Level Variable1 Variable2 Variance/Covariance SD Variance/Covariance
## 1 subject (Intercept) <NA> 1.221 1.105
## 2 subject (Intercept) age 0.082 0.497
## 3 subject age <NA> 0.022 0.149
## 4 Residual <NA> <NA> 1.563 1.250
# add the "predicted" column to this dataset (it's not really a prediction because its the same dataset, it just shows the model)
# Pull out the age column
ageVec <- modelData %>% pull(!!age)
# Get the coefficients of the model
coef <- summary(fit)$coefficients
# Get the predicted column (note we did this manually in case covariates were added). If covariates were added then essentially this predicted "zero" variable is when the covariates are set to zero.
zero <- ageVec * coef[2,1] + coef[1,1]
# Get the row index from the coefficients table that correspond to the condition we are splitting on
rowIndex <- which(str_detect(string = row.names(coef),
pattern = condition) &
str_detect(string = row.names(coef),
pattern = ":", negate = T))
# Same as above but for the interaction term
rowIndexInteract1 <- which(str_detect(string = row.names(coef),
pattern = condition) &
str_detect(string = row.names(coef),
pattern = ":") &
str_detect(string = row.names(coef),
pattern = "\\^", negate = T))
# How many levels does out condition have, eg. 2 if options are only male or female in this example
n <- length(unique(pull(modelData, !!sym(condition))))
# Calculate the prediction for the other level, ie. when the covariate is not set to zero
predCovs <- lapply(1:(n-1), function(i){
coef[1,1] +
(ageVec * coef[2,1]) +
coef[rowIndex[i],1] +
(ageVec * coef[rowIndexInteract1[i],1])
})
# Name of level, in this case female is coded as 0 or 1
num <- str_subset(row.names(coef), condition) %>%
sub(paste0(".*", condition), "", .) %>%
unique()
# Name the list by the the level it corresponds to
names(predCovs) <- paste0(condition, "_", num)
# Add these as new columns to the dataset and make a pred column which gives the correct predicted value for the corresponding condition level, ie. the female predicted value if the participant is female.
modelDataEdit <- cbind(modelData, do.call(cbind, predCovs)) %>%
mutate(zero = zero) %>%
mutate(!!condition := as.factor(.[[colSplit]]) ) %>%
mutate(pred = eval(parse(text =
paste0(paste0("ifelse(", condition, " == '", num, "', ", condition, "_",num,",", collapse = " "), "zero",
paste0(rep(")", length(num)), collapse = ""), collapse = "")
)))
Plot
# use glht to calculate 95% CIs
# Pull out the age column
ageOrig <- modelDataEdit %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
# Determine which ages we want to calculate 95% CI for
# Don't need to do it for every participant as age when age is fairly similar, instead do it as a sequence of 0.5 for every unit in age.
# This speeds up the calculation time
ageCalcs <- c(min(ageOrig), seq(round(min(ageOrig), 1), round(max(ageOrig), 1), 0.5), max(ageOrig) )
# Go through each age and calculate 95% CI
CI_glht <- lapply(ageCalcs, function(x){
# Get number of levels in condition
n <- length(unique(pull(modelDataEdit, !!sym(condition))))
# Get corresponding rows for condition in coefficients table
rowIndex <- which(str_detect(string = row.names(summary(fit)$coefficients),
pattern = condition) &
str_detect(string = row.names(summary(fit)$coefficients),
pattern = ":", negate = T))
rowNames <- rownames(summary(fit)$coefficients)
ageInput <- round(x - mean(ageOrig), 3)
equations <- sapply(1:(n-1), function(i){
paste0(rowNames[1], " + \`", rowNames[2], "\`*", ageInput, " + \`", rowNames[rowIndex[i]] , "\` + \`", rowNames[2], ":", rowNames[rowIndex[i]],"\`*",ageInput , " == 0")
})
res <- multcomp::glht(fit, linfct = c( paste0(rowNames[1], " + \`", rowNames[2], "\`*", ageInput, " == 0"), equations) )
# Tidy results dataframe and rename rows/columns
res <- tidy(confint(res))
rowname <- paste0("Score (", traj, ")")
levelNames <- as.character(levels(as.factor(pull(modelDataEdit, !!sym(condition)))))
res <- res %>%
mutate(condition = levelNames)
})
ageOrig <- modelDataEdit %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
ageCalcs <- c(min(ageOrig), seq(round(min(ageOrig), 1), round(max(ageOrig), 1), 0.5), max(ageOrig) )
levelNames <- as.character(levels(as.factor(pull(modelDataEdit, !!sym(condition)))))
estimate <- do.call(rbind, CI_glht) %>%
mutate(age = rep(ageCalcs, each = length(levelNames)) )
ggplot() +
geom_line(data = modelDataEdit, aes(x= age_original , y = pred, color = !!sym(condition) ) , na.rm=T) +
geom_ribbon(data = estimate ,
aes(x= age , ymin = conf.low, ymax = conf.high, fill = condition), alpha = 0.2, na.rm = T, show.legend = FALSE) +
scale_color_manual(values = c("#E69F00","#56B4E9","#009E73","#F5C710")) +
scale_fill_manual(values = c("#E69F00","#56B4E9","#009E73","#F5C710")) +
geom_point(data = df.plot,aes(x=Age, y=Phenotype))+
geom_line(data = df.plot,aes(x=Age, y=Phenotype)) +
geom_errorbar(data = df.plot, aes(x=Age, y=Phenotype, ymin = lower, ymax = upper)) +
theme(legend.text = element_text(color = "black"))+
ylab(paste0("Score (", traj, ")")) +
xlab("Age")
Scores at ages
# Select ages to calculate scores for (same as the checkboxes on the app)
ageInputScore <- c(4, 9, 14)
# use glht to calculate scores at ages
ageOrig <- modelDataEdit %>% pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
score_glht <- lapply(as.numeric(ageInputScore), function(x){
n <- length(unique(pull(modelDataEdit, !!sym(condition))))
rowIndex <- which(str_detect(string = row.names(summary(fit)$coefficients),
pattern = condition) &
str_detect(string = row.names(summary(fit)$coefficients),
pattern = ":", negate = T))
rowNames <- rownames(summary(fit)$coefficients)
ageInput <- round(x - mean(ageOrig), 3)
equations <- sapply(1:(n-1), function(i){
paste0(rowNames[1], " + ", rowNames[2], "*", ageInput, " + ", rowNames[rowIndex[i]] , " + ", rowNames[2], ":", rowNames[rowIndex[i]],"*",ageInput , " == 0")
})
res <- multcomp::glht(fit, linfct = c( paste0(rowNames[1], " + ", rowNames[2], "*", ageInput, " == 0"), equations) )
# Tidy results dataframe and rename rows/columns
res <- tidy(confint(res))
rowname <- paste0("Score (", traj, ")")
levelNames <- as.character(levels(as.factor(pull(modelDataEdit, !!sym(condition)))))
res <- res %>%
mutate(contrast = paste0(rowname, " [", condition, ", level = ", levelNames, " ] (95% CIs)")) %>%
column_to_rownames(var = "contrast")
})
# Plot the scores at the given age
estimate <- lapply(score_glht, function(df) {
df %>%
dplyr::select(estimate)
}) %>% do.call(cbind, .)
colnames(estimate) <- ageInputScore
estimate <- estimate %>%
gather(age, score, 1:ncol(estimate)) %>%
mutate(age = as.numeric(age))
conf.low <- lapply(score_glht, function(df) {df %>% dplyr::select(conf.low)}) %>% do.call(cbind, .)
colnames(conf.low) <- ageInputScore
conf.low <- conf.low %>%
gather(age, conf.low, 1:ncol(conf.low)) %>%
mutate(age = as.numeric(age))
## Warning: attributes are not identical across measure variables; they will be
## dropped
conf.high <- lapply(score_glht, function(df) {df %>% dplyr::select(conf.high)}) %>% do.call(cbind, .)
colnames(conf.high) <- ageInputScore
conf.high <- conf.high %>%
gather(age, conf.high, 1:ncol(conf.high)) %>%
mutate(age = as.numeric(age))%>%
dplyr::select(-age)
## Warning: attributes are not identical across measure variables; they will be
## dropped
conf <- cbind(conf.low, conf.high, "age")
# Plot
ggplot() +
geom_line(data = modelDataEdit, aes(x= age_original , y = pred, color = !!sym(condition) ) , na.rm=T) +
theme(legend.text = element_text(color = "black")) +
geom_errorbar(data = conf, aes(x = age, ymin = conf.low, ymax = conf.high), width = 0.5) +
geom_point(data = estimate, aes(x = age, y = score), col = "#1D86C7", size = 5) +
ylab(paste0("Score (", traj, ")")) +
xlab("Age")
# Table
estimateCI <- lapply(score_glht, function(df) {
df %>%
mutate(estimateCI = paste0(round(estimate,3) , " (", round(conf.low,3) , " - ", round(conf.high,3) , ")")) %>%
dplyr::select(estimateCI)
}) %>% do.call(cbind, .)
colnames(estimateCI) <- ageInputScore
estimateCI
## 4
## Score (score) [er_bin, level = 0 ] (95% CIs) 1.163 (1.131 - 1.195)
## Score (score) [er_bin, level = 1 ] (95% CIs) 2.168 (2.053 - 2.282)
## Score (score) [er_bin, level = 2 ] (95% CIs) 1.786 (1.707 - 1.864)
## Score (score) [er_bin, level = 3 ] (95% CIs) 2.597 (2.479 - 2.716)
## 9
## Score (score) [er_bin, level = 0 ] (95% CIs) 1.492 (1.456 - 1.528)
## Score (score) [er_bin, level = 1 ] (95% CIs) 2.694 (2.566 - 2.822)
## Score (score) [er_bin, level = 2 ] (95% CIs) 2.034 (1.946 - 2.122)
## Score (score) [er_bin, level = 3 ] (95% CIs) 2.888 (2.754 - 3.021)
## 14
## Score (score) [er_bin, level = 0 ] (95% CIs) 1.821 (1.763 - 1.878)
## Score (score) [er_bin, level = 1 ] (95% CIs) 3.22 (3.017 - 3.423)
## Score (score) [er_bin, level = 2 ] (95% CIs) 2.283 (2.143 - 2.423)
## Score (score) [er_bin, level = 3 ] (95% CIs) 3.178 (2.964 - 3.392)
# Difference in scores at ages (with 95% CIs)
# "Select two levels from your factor to calculate the difference in scores:"
levelsScores <- c(0,3)
differenceScores <- lapply(as.numeric(ageInputScore), function(x){
ageInput <- round(x - mean(ageOrig), 3)
coef <- summary(fit)$coefficients
rowIndex <- which(str_detect(string = row.names(coef),
pattern = condition) &
str_detect(string = row.names(coef),
pattern = ":", negate = T))
rowNames <- rownames(coef) %>%
str_remove_all("I|\\(|\\^|\\)|\\:")
levelNames <- paste0(condition,levelsScores) %>%
str_remove_all("I|\\(|\\^|\\)|\\:")
levelNames1 <- rowNames[str_detect(rowNames, paste0(levelNames, collapse = "|"))]
res <- deltaMethod(fit, c(paste0(
"(", rowNames[1], " + ", rowNames[2], "*", ageInput, " + ", levelNames1[1], " + ", levelNames1[2], "*", ageInput, ") - (",rowNames[1], " + ", rowNames[2], "*", ageInput, ")"
)), parameterNames = rowNames )
dif <- paste0( round(res$Estimate, 3), " (", round(res$`2.5 %`,3), " - ", round(res$`97.5 %`,3), ")")
res <- data.frame(age = dif)
return(res)
})
difTab <- do.call(cbind, differenceScores)
colnames(difTab) <- ageInputScore
rownames(difTab) <- paste0("Difference between ",levelsScores[1]," and ", levelsScores[2]," (95% CI)")
difTab
## 4 9
## Difference between 0 and 3 (95% CI) 1.434 (1.338 - 1.531) 1.396 (1.287 - 1.505)
## 14
## Difference between 0 and 3 (95% CI) 1.357 (1.183 - 1.532)
Area under the curve
# Select ages to calculate AUC for (same as the slider bar on the app)
AUCages <- c(4,10)
coef <- summary(fit)$coefficients
ageOrig <- modelDataEdit %>%
pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
age1 <- AUCages[1] - mean(ageOrig)
age2 <- AUCages[2] - mean(ageOrig)
rowNames <- rownames(coef) %>%
str_remove_all("I|\\(|\\^|\\)|\\:")
AUC <- deltaMethod(fit, c( paste0("(((", age2, ")*(", rowNames[1], ")) + ((", rowNames[2], ")*(", age2, ")^2/2)) - (((", age1,")*(", rowNames[1], ")) + ((", rowNames[2], ")*(", age1, ")^2/2))") ) , parameterNames = rowNames)
AUC <- paste0( round(AUC$Estimate, 2), " (", round(AUC$`2.5 %`,2), " - ", round(AUC$`97.5 %`,2), ")")
rowIndex <- which(str_detect(string = row.names(summary(fit)$coefficients),
pattern = condition) &
str_detect(string = row.names(summary(fit)$coefficients),
pattern = ":", negate = T))
rowIndexInteract1 <- which(str_detect(string = row.names(summary(fit)$coefficients),
pattern = condition) &
str_detect(string = row.names(summary(fit)$coefficients),
pattern = ":") &
str_detect(string = row.names(summary(fit)$coefficients),
pattern = "\\^", negate = T))
n <- length(unique(pull(modelDataEdit, !!sym(condition))))
AUCCovs <- lapply(1:(n-1), function(i){
AUC <- deltaMethod(fit,
c( paste0(
"(((",age2,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age2,")^2/2) + ((",rowNames[rowIndex[i]],")*(",age2,")) + ((",rowNames[rowIndexInteract1[i]],")*(",age2,")^2/2) ) -
(((",age1,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age1,")^2/2) + ((",rowNames[rowIndex[i]],")*(",age1,")) + ((",rowNames[rowIndexInteract1[i]],")*(",age1,")^2/2) )"
) )
, parameterNames = rowNames )
AUC <- paste0( round(AUC$Estimate, 2), " (", round(AUC$`2.5 %`,2), " - ", round(AUC$`97.5 %`,2), ")")
AUC
})
AUC_delta <- list(AUC = AUC, AUCCovs = AUCCovs)
# Plot
ggplot(data = modelDataEdit) +
geom_ribbon(data = modelDataEdit,
aes(x = age_original, ymax = pred, ymin = 0, fill = !!sym(condition)),
alpha = 0.1, show.legend = FALSE) +
coord_cartesian(xlim = c(AUCages[1], AUCages[2])) +
geom_line(aes(x = age_original, y = pred, color = !!sym(condition)), na.rm = TRUE) +
theme(legend.text = element_text(color = "black")) +
ylab(paste0("Score (", traj, ")")) +
xlab("Age") +
scale_x_continuous(breaks = seq(round(min(modelDataEdit$age_original, na.rm =T)), round(max(modelDataEdit$age_original, na.rm =T)), by = 1),
expand = c(0, 0))+
scale_y_continuous(expand = c(0, 0))
# Table
df <- t(
cbind(
data.frame(paste0(AUCages[1], " - ", AUCages[2]),
AUC_delta$AUC),
do.call(cbind, AUC_delta$AUCCovs)
)
)
rowname <- paste0("AUC (", traj, ")")
levelNames <- as.character(levels(as.factor(pull(modelDataEdit, condition))))
rownames(df) <- c("Age Range", paste0(rowname, " [", condition, ", level = ", levelNames, " ] (95% CIs)") )
df
## [,1]
## Age Range "4 - 10"
## AUC (score) [er_bin, level = 0 ] (95% CIs) "8.16 (8.01 - 8.31)"
## AUC (score) [er_bin, level = 1 ] (95% CIs) "14.9 (14.37 - 15.43)"
## AUC (score) [er_bin, level = 2 ] (95% CIs) "11.61 (11.25 - 11.97)"
## AUC (score) [er_bin, level = 3 ] (95% CIs) "16.63 (16.08 - 17.18)"
# -------
# Difference in AUC
# "Select two levels from your factor to calculate the difference in AUC between:"
levelsAUC <- c(0,3)
levelNames <- paste0(condition,levelsAUC) %>%
str_remove_all("I|\\(|\\^|\\)|\\:")
coef <- summary(fit)$coefficients
ageOrig <- modelDataEdit %>%
pull(age_original)
ageOrig <- ageOrig[!is.na(ageOrig)]
age1 <- AUCages[1] - mean(ageOrig)
age2 <- AUCages[2] - mean(ageOrig)
rowNames <- rownames(coef) %>%
str_remove_all("I|\\(|\\^|\\)|\\:")
levelNames1 <- rowNames[str_detect(rowNames, paste0(levelNames, collapse = "|"))]
res <- deltaMethod(fit, c( paste0(" (( ((",age2,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age2,")^2/2) + ( (",levelNames1[1],")*(",age2,") ) + ((",levelNames1[2],")*(",age2,")^2/2) ) - ( ((",age1,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age1,")^2/2) + ( (",levelNames1[1],")*(",age1,") ) + ((",levelNames1[2],")*(",age1,")^2/2) )) - (( ((",age2,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age2,")^2/2) ) - ( ((",age1,")*(",rowNames[1],")) + ((",rowNames[2],")*(",age1,")^2/2) )) ") ) ,parameterNames = rowNames)
dif <- paste0( round(res$Estimate, 2), " 95% CI: (", round(res$`2.5 %`,2), " - ", round(res$`97.5 %`,2), ")")
statement <- paste0("The difference between the two factor levels (for the age ranges ", AUCages[1], " - ", AUCages[2] ,") is ", dif ,".")
statement
## [1] "The difference between the two factor levels (for the age ranges 4 - 10) is 8.47 95% CI: (7.9 - 9.04)."