This notebook conducts and demonstrates the results of the Random-effects meta-analysis performed on each healthcare system’s locally run covariate adjusted Kaplan-Meier survival curves.
Of note, this analysis is performed only on adult patients due to the pediatric cohort’s low incidence of both poor health outcomes and neurological diagnoses during COVID-19 hospitalization.
library(tidyverse)
library(metafor)
library(meta)
library(egg)
library(DT)
library(ggpubr)
source("R/forester_custom.R")
source("R/analyze_survival.R")
Each participating healthcare system ran the analysis locally on patient level data using our customized R package. The output of each local analysis consisted of only the summary results (rather than patient level data). The model summary results from each healthcare system were then aggregated and are imported and then combined using the following code:
# read in files from results folder
# this folder contains all of the local healthcare system level analyses
rdas <- list.files(
path = "results",
pattern = ".rda",
full.names = TRUE
)
for (rda in rdas) {
load(rda)
}
rm(rdas, rda)
# create a list of participating healthcare systems from our study tracking spreadsheet
site_google_url <- "https://docs.google.com/spreadsheets/d/1epcYNd_0jCUMktOHf8mz5v651zy1JALD6PgzobrGWDY/edit?usp=sharing"
# load site parameters
site_params <- googlesheets4::read_sheet(site_google_url, sheet = 1)
site_avails <- googlesheets4::read_sheet(site_google_url, sheet = 2)
# filter the list of sites who ran the analysis
sorted_sites <- site_avails %>%
filter(!is.na(date_v4_received)) %>%
pull(siteid) %>%
paste("results", sep = "_")
# list sites without race
sites_wo_race <- site_params %>%
filter(!include_race) %>%
pull(siteid)
# combine all rda files with 'results' in name
results <- mget(ls(pattern = "results"))
## read in pt counts
pt_counts_df <- read.csv('tables/site_pt_counts.csv')
outcomes <-
c(
"time_first_discharge_reg_elix",
"deceased_reg_elix"
)
In this analysis, we only included a healthcare system if they had >= 3 adult patients with a neurological diagnosis.
## adults
sites_adult <- pt_counts_df %>%
filter(population == "Adult") %>%
mutate(neuro_sum = n_var_Central + n_var_Peripheral) %>%
filter(!neuro_sum < 3) %>%
distinct(site) %>%
mutate(site = gsub("_results", "", site))
sites_adult_results <- sites_adult %>%
mutate(site = paste0(site, "_results")) %>%
as.vector()
adult_results <- results[grepl(paste(sites_adult_results$site, collapse = "|"), names(results))]
This notebook specifies the parameters to load in the Cox-PH results
using a censor_cutoff
=‘90’ (days since initial
hospitalization) and the comorb_method
=‘ind’ which adjusts
for patient comorbidity burden by treating each individual comorbidity
of the Elixhauser Comorbidity Index as an individual model covariate
(binary variable indicating whether or not the patient previously was
diagnosed with the respective comorbidity).
cox_results_adults <- list()
for (outcome_i in outcomes) {
cox_results_adults[[outcome_i]] <-
adult_results %>%
lapply(get_cox_row, population = "adults", comorb_method = "ind", censor_cutoff = "90", outcome = outcome_i) %>%
bind_rows() %>%
mutate(outcome = outcome_i)
}
Similarly, we will will load in the KM results while specifying the
censor_cutoff
=‘90’ and
comorb_method
=‘ind’.
res_dat_surv <- bind_rows(km_results_adults) %>%
add_count(site, strata, outcome, name = "n_timepoints") %>%
# add rows to fill in missing time points (this ensures all sites will have up to the censor date)
complete(time, site, nesting(strata, outcome)) %>%
group_by(site, strata, outcome) %>%
arrange(time) %>%
# carry down the survival and standard errors to fill in missing time periods
fill(surv, .direction = "down") %>%
fill(std.err.sqrt, n_timepoints, .direction = "down") %>%
fill(std.err, n_timepoints, .direction = "down") %>%
ungroup() %>%
mutate(
strata = toupper(strata),
strata = factor(strata,
levels=c("NONE", "CNS", "PNS"),
labels=c("NNC", "CNS", "PNS"))
)
Here we will specify several parameters to aid in our construction of the survival figure
# calculate CIs
zval <- qnorm(1- (1-0.95)/2, 0, 1)
standard_error <- function(x) sd(x, na.rm = TRUE) / sqrt(length(x)) # Create own function
# set plot colors
group.colors <- c(NNC = "#BBBBBC", PNS = "slateblue", CNS ="tomato")
Here we will weight each site by the inverse of its variance calculated from the CNS and PNS meta-analysis models. We will take the average of the CNS and PNS weights
load("survival_model/adults_deceased_reg_elix_ind_90_CNS.rda")
load("survival_model/adults_deceased_reg_elix_ind_90_PNS.rda")
load("survival_model/adults_time_first_discharge_reg_elix_ind_90_CNS.rda")
load("survival_model/adults_time_first_discharge_reg_elix_ind_90_PNS.rda")
# rename weight column
cns_weights <- adults_deceased_reg_elix_ind_90_CNS %>%
rbind(., adults_time_first_discharge_reg_elix_ind_90_CNS) %>%
rename("cns_weight" = Weight.random) %>%
select(-strata)
pns_weights <- adults_deceased_reg_elix_ind_90_PNS %>%
rbind(., adults_time_first_discharge_reg_elix_ind_90_PNS) %>%
rename("pns_weight" = Weight.random) %>%
select(-strata)
## average the weights
avg_weights <- cns_weights %>%
left_join(., pns_weights, by = c("analysis", "studlab")) %>%
mutate(cns_weight = as.numeric(cns_weight),
pns_weight = as.numeric(pns_weight),
avg_weight = rowMeans(cbind(cns_weight, pns_weight), na.rm = TRUE)) %>%
rename("outcome" = analysis,
"site" = studlab)
meta_surv_df <- res_dat_surv %>%
left_join(., avg_weights, by = c("site", "outcome")) %>%
ungroup() %>%
as.data.frame() %>%
mutate(
outcome = factor(outcome, levels = outcomes) %>%
fct_recode(
"Mortality" = "deceased_reg_elix",
"Discharge" = "time_first_discharge_reg_elix"
)) %>%
ungroup()
Random effects meta analysis function
# chuan's code
## y is the vector of beta coefficients;
## s is the vector of SE of beta coefficients;
## wt is the weight (e.g, number of events, make sure it does not change over time)
# Meg added the rule to handle cases when v.between is a negative number based on the following reference which explains:
# https://bookdown.org/MathiasHarrer/Doing_Meta_Analysis_in_R/heterogeneity.html
# the value of I2 cannot be lower than 0%, so if Q happens to be smaller than K-1, we simply use 0 instead of a negative value.
metaFUN.randomeffect=function(y, s, wt){
mm=c(y%*%wt/sum(wt))
df=length(y)-1
C=sum(wt)-sum((wt)^2)/sum(wt)
Q=wt%*%(y-mm)^2
k = length(y)
if (Q < k-1) {
v.between=0
} else {
v.between=c((Q-df)/C); v.between # variance between (tau^2)
}
ww=1/(s^2+v.between) # random-effects weight
ss=sqrt(1/sum(ww)) # sum of squares
## MH addition based on link above and discussion of T^2 approximating X^2 distribution##
#ci_low = qchisq(0.025, df=df)
#ci_high = qchisq(0.975, df=df)
data.frame(pool_mean=mm, pool_se=ss) #pool_high = ci_high, tau2=v.between, ww=ww)
}
meta_surv_ci_df <- meta_surv_df %>%
group_by(strata, outcome, time) %>%
mutate(
der_std.err = std.err*surv, # average std.err of the cumulative hazard * average surv probability
m_surv = metaFUN.randomeffect(surv, der_std.err, avg_weight)$pool_mean,
m_se = metaFUN.randomeffect(surv, der_std.err, avg_weight)$pool_se,
ci_l = m_surv - m_se*zval,
ci_u = m_surv + m_se*zval) %>%
ungroup() %>%
distinct(outcome, strata, time, m_surv, m_se, ci_l, ci_u)
surv_curves <- meta_surv_ci_df %>%
ggplot(aes(
x = time, y = m_surv,
color = strata,
fill = strata,
)) +
geom_line(aes()) +
geom_ribbon(aes(ymin = ci_l, ymax = ci_u, fill = strata), alpha = 0.2, linetype = 0, show.legend = FALSE) +
labs(color = NULL, y = NULL) +
scale_colour_manual(values = group.colors) +
scale_fill_manual(values = group.colors) +
facet_wrap(~outcome, ncol = 3) +
coord_cartesian(clip = "off", ylim = c(0, 1), expand = FALSE) +
xlab("Time (days)") +
ylab("Event Probability") +
scale_x_continuous(limits = c(0, 90), breaks = c(0, 30, 60, 90)) +
theme_classic() +
labs(color = "Neurological Status",
fill = "") +
theme(legend.position="top",
legend.title = element_text(face = "bold"),
legend.text = element_text(size = 10, face = "bold"),
strip.text = element_text(color = "black", size = 12, face = "bold"),
panel.spacing = unit(60, "pt"),
axis.title.x = element_text(size = 13, face = "bold"),
axis.title.y = element_text(size = 13, face = "bold"),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(color = "black", size = 10 )); surv_curves
ggsave("figures/survival_curves.png", surv_curves, height = 5, width = 6)
breaks_list <- list("Discharge" = c(0, 6, 8, 11, 30, 60, 90),
"Mortality" = c(0, 12, 24, 30, 60, 90))
# determine median time where at least 50% patients were discharged
median_data_discharge <- meta_surv_ci_df %>%
group_by(strata, outcome) %>%
filter(outcome == "Discharge") %>%
summarize(median_time = min(time[m_surv <= 0.5])) %>%
mutate(yend = 0.5,
y = 0.5)
#determine median time where at least 10% patients died
median_data_mortality <- meta_surv_ci_df %>%
group_by(strata, outcome) %>%
filter(outcome == "Mortality") %>%
summarize(median_time = min(time[m_surv <= 0.9])) %>%
mutate(yend = .9,
y = 0.9)
median_data <- rbind(median_data_discharge, median_data_mortality) %>%
filter(!is.infinite(median_time))
# plot individual survival plots
discharge_plot <- meta_surv_ci_df %>%
filter(outcome == 'Discharge') %>%
ggplot(aes(
x = time, y = m_surv,
color = strata,
fill = strata,
)) +
geom_line() +
geom_ribbon(aes(ymin = ci_l, ymax = ci_u, fill = strata), alpha = 0.2, linetype = 0, show.legend = FALSE) +
labs(color = NULL, y = NULL) +
scale_colour_manual(values = group.colors) +
scale_fill_manual(values = group.colors) +
coord_cartesian(clip = "off", ylim = c(0, 1), expand = FALSE) +
xlab("Time (days)") +
ylab("Event Probability") +
scale_x_continuous(limits = c(0, 90), breaks = breaks_list$Discharge) + #breaks = breaks_fun) +
theme_classic() +
labs(color = "Neurological Status",
fill = "") +
ggtitle("Discharge") +
theme(plot.title = element_text(face = "bold", hjust=0.5, size = 16),
legend.position="top",
legend.title = element_text(face = "bold"),
legend.text = element_text(size = 8, face = "bold"),
strip.text = element_text(color = "black", size = 12, face = "bold"),
panel.spacing = unit(60, "pt"),
axis.title.x = element_text(size = 13, face = "bold"),
axis.title.y = element_text(size = 13, face = "bold"),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(
color = c("black","#BBBBBC", "slateblue", "tomato", "black", "black", "black"),
size = 10,
face = c("bold", "plain", "plain", "plain", "bold", "bold", "bold"))
) +
geom_segment(
aes(x = median_time, xend = median_time, y = 0, yend = yend, color = strata),
data = median_data %>% filter(outcome == "Discharge"),
linetype = "dashed",
size = 0.5
) +
geom_segment(
aes(x = median_time, xend = 0, y = y, yend = yend, color = strata),
data = median_data %>% filter(outcome == "Discharge"),
linetype = "dashed",
size = 0.5
)
mortality_plot <- meta_surv_ci_df %>%
filter(outcome == 'Mortality') %>%
ggplot(aes(
x = time, y = m_surv,
color = strata,
fill = strata,
)) +
geom_line() +
geom_ribbon(aes(ymin = ci_l, ymax = ci_u, fill = strata), alpha = 0.2, linetype = 0, show.legend = FALSE) +
labs(color = NULL, y = NULL) +
scale_colour_manual(values = group.colors) +
scale_fill_manual(values = group.colors) +
coord_cartesian(clip = "off", ylim = c(0, 1), expand = FALSE) +
xlab("Time (days)") +
ylab("Event Probability") +
scale_x_continuous(limits = c(0, 90), breaks = breaks_list$Mortality) + #breaks = breaks_fun) +
theme_classic() +
labs(color = "Neurological Status",
fill = "") +
ggtitle("Mortality") +
theme(plot.title = element_text(face = "bold", hjust=0.5, size = 16),
legend.position="none",
legend.title = element_text(face = "bold"),
legend.text = element_text(size = 10, face = "bold"),
strip.text = element_text(color = "black", size = 12, face = "bold"),
panel.spacing = unit(60, "pt"),
axis.title.x = element_text(size = 13, face = "bold"),
axis.title.y = element_text(size = 13, face = "bold"),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(
color = c("black", "tomato", "#BBBBBC", "black", "black", "black"),
size = 10,
face = c("bold", "plain", "plain", "bold", "bold", "bold"))
) +
geom_segment(
aes(x = median_time, xend = median_time, y = 0, yend = yend, color = strata),
data = median_data %>% filter(outcome == "Mortality"),
linetype = "dashed",
size = 0.5
) +
geom_segment(
aes(x = median_time, xend = 0, y = y, yend = yend, color = strata),
data = median_data %>% filter(outcome == "Mortality"),
linetype = "dashed",
size = 0.5
)
# combine plots into one figure
survival_curves_time <- ggarrange(discharge_plot, mortality_plot, ncol=2, nrow=1, common.legend = TRUE, legend="top") %>%
ggexport(filename = "figures/survival_curves_time.pdf")
ggsave("figures/survival_curves_times.png", ggarrange(discharge_plot, mortality_plot, ncol=2, nrow=1, common.legend = TRUE, legend="top"), height = 4, width = 8)
ggarrange(discharge_plot, mortality_plot, ncol=2, nrow=1, common.legend = TRUE, legend="top")
risk_table <- list()
for (outcome_i in outcomes) {
risk_table[[outcome_i]] <-
adult_results %>%
lapply(get_risk_table, population = "adults", comorb_method = "ind", censor_cutoff = "90", outcome = outcome_i) %>%
bind_rows() %>%
mutate(outcome = outcome_i)
}
# tidy table
risk_table_tidy <- bind_rows(risk_table) %>%
mutate(
outcome = factor(outcome, levels = outcomes) %>%
fct_recode(
"Mortality" = "deceased_reg_elix",
"Discharge" = "time_first_discharge_reg_elix"
),
strata = factor(strata) %>%
fct_recode(
"NNC" = "neuro_post=None",
"CNS" = "neuro_post=Central",
"PNS" = "neuro_post=Peripheral"
)
) %>%
# in some cases, sites did not have patients past 60 days -- thus the cumulative at T=90 is off - i need to carry forward the previous sum.event and n.risk?
complete(time, site, nesting(strata, outcome)) %>%
arrange(outcome, site, strata) %>%
fill(n.risk, cum.n.event, .direction = "down") %>%
group_by(outcome, strata, time) %>%
mutate(sum.n.risk = sum(n.risk, na.rm = TRUE),
sum.event = sum(cum.n.event, na.rm = TRUE),
sum.censor = sum(cum.n.censor, na.rm = TRUE)) %>%
ungroup() %>%
distinct(outcome, time, strata, sum.n.risk, sum.event, sum.censor)
write.csv(risk_table_tidy, "tables/risk_table_90_ind.csv", row.names = FALSE)
datatable(risk_table_tidy)